Actual source code: redundant.c
1: #define PETSCKSP_DLL
3: /*
4: This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
5: */
6: #include private/pcimpl.h
7: #include petscksp.h
9: typedef struct {
10: PC pc; /* actual preconditioner used on each processor */
11: Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
12: Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */
13: Mat pmats; /* matrix and optional preconditioner matrix belong to a subcommunicator */
14: VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
15: PetscTruth useparallelmat;
16: MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */
17: MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way
18: that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
19: PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */
20: PetscInt color; /* color of processors in a subcommunicator */
21: } PC_Redundant;
23: #include src/sys/viewer/impls/ascii/asciiimpl.h
24: #include "petscfix.h"
25: #include <stdarg.h>
27: PetscErrorCode PetscViewerRestoreSubcomm(PetscViewer viewer,PetscViewer *outviewer)
28: {
30: PetscMPIInt size;
35: MPI_Comm_size(viewer->comm,&size);
36: if (size == 1) {
37: PetscObjectDereference((PetscObject)viewer);
38: if (outviewer) *outviewer = 0;
39: } else if (viewer->ops->restoresingleton) {
40: (*viewer->ops->restoresingleton)(viewer,outviewer);
41: }
42: return(0);
43: }
47: PetscErrorCode PetscViewerDestroy_ASCIIh(PetscViewer viewer)
48: {
49: PetscMPIInt rank;
50: PetscErrorCode ierr;
51: PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;
52: PetscViewerLink *vlink;
53: PetscTruth flg;
56: if (vascii->sviewer) {
57: SETERRQ(PETSC_ERR_ORDER,"ASCII PetscViewer destroyed before restoring singleton PetscViewer");
58: }
59: MPI_Comm_rank(viewer->comm,&rank);
60: if (!rank && vascii->fd != stderr && vascii->fd != stdout) {
61: if (vascii->fd) fclose(vascii->fd);
62: if (vascii->storecompressed) {
63: char par[PETSC_MAX_PATH_LEN],buf[PETSC_MAX_PATH_LEN];
64: FILE *fp;
65: PetscStrcpy(par,"gzip ");
66: PetscStrcat(par,vascii->filename);
67: #if defined(PETSC_HAVE_POPEN)
68: PetscPOpen(PETSC_COMM_SELF,PETSC_NULL,par,"r",&fp);
69: if (fgets(buf,1024,fp)) {
70: SETERRQ2(PETSC_ERR_LIB,"Error from compression command %s\n%s",par,buf);
71: }
72: PetscPClose(PETSC_COMM_SELF,fp);
73: #else
74: SETERRQ(PETSC_ERR_SUP_SYS,"Cannot run external programs on this machine");
75: #endif
76: }
77: }
78: PetscStrfree(vascii->filename);
79: PetscFree(vascii);
81: /* remove the viewer from the list in the MPI Communicator */
82: if (Petsc_Viewer_keyval == MPI_KEYVAL_INVALID) {
83: MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelViewer,&Petsc_Viewer_keyval,(void*)0);
84: }
86: MPI_Attr_get(viewer->comm,Petsc_Viewer_keyval,(void**)&vlink,(PetscMPIInt*)&flg);
87: if (flg) {
88: if (vlink && vlink->viewer == viewer) {
89: MPI_Attr_put(viewer->comm,Petsc_Viewer_keyval,vlink->next);
90: PetscFree(vlink);
91: } else {
92: while (vlink && vlink->next) {
93: if (vlink->next->viewer == viewer) {
94: PetscViewerLink *nv = vlink->next;
95: vlink->next = vlink->next->next;
96: PetscFree(nv);
97: }
98: vlink = vlink->next;
99: }
100: }
101: }
102: return(0);
103: }
107: PetscErrorCode PetscViewerDestroy_ASCII_Subcomm(PetscViewer viewer)
108: {
109: PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;
110: PetscErrorCode ierr;
112: PetscViewerRestoreSubcomm(vascii->bviewer,&viewer);
113: return(0);
114: }
118: PetscErrorCode PetscViewerFlush_ASCII_Subcomm_0(PetscViewer viewer)
119: {
120: PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;
123: fflush(vascii->fd);
124: return(0);
125: }
129: PetscErrorCode PetscViewerGetSubcomm_ASCII(PetscViewer viewer,MPI_Comm subcomm,PetscViewer *outviewer)
130: {
131: PetscMPIInt rank;
132: PetscErrorCode ierr;
133: PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data,*ovascii;
134: const char *name;
137: if (vascii->sviewer) {
138: SETERRQ(PETSC_ERR_ORDER,"Subcomm already obtained from PetscViewer and not restored");
139: }
140: /* PetscViewerCreate(PETSC_COMM_SELF,outviewer); */
141: PetscViewerCreate(subcomm,outviewer);
142: PetscViewerSetType(*outviewer,PETSC_VIEWER_ASCII);
143: ovascii = (PetscViewer_ASCII*)(*outviewer)->data;
144: ovascii->fd = vascii->fd;
145: ovascii->tab = vascii->tab;
147: vascii->sviewer = *outviewer;
149: (*outviewer)->format = viewer->format;
150: (*outviewer)->iformat = viewer->iformat;
152: PetscObjectGetName((PetscObject)viewer,&name);
153: PetscObjectSetName((PetscObject)(*outviewer),name);
155: MPI_Comm_rank(viewer->comm,&rank);
156: ((PetscViewer_ASCII*)((*outviewer)->data))->bviewer = viewer;
157: (*outviewer)->ops->destroy = PetscViewerDestroy_ASCII_Subcomm;
158: if (rank) {
159: (*outviewer)->ops->flush = 0;
160: } else {
161: (*outviewer)->ops->flush = PetscViewerFlush_ASCII_Subcomm_0;
162: }
163: return(0);
164: }
168: PetscErrorCode PetscViewerRestoreSubcomm_ASCII(PetscViewer viewer,MPI_Comm subcomm,PetscViewer *outviewer)
169: {
170: PetscErrorCode ierr;
171: PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)(*outviewer)->data;
172: PetscViewer_ASCII *ascii = (PetscViewer_ASCII *)viewer->data;
175: if (!ascii->sviewer) {
176: SETERRQ(PETSC_ERR_ORDER,"Subcomm never obtained from PetscViewer");
177: }
178: if (ascii->sviewer != *outviewer) {
179: SETERRQ(PETSC_ERR_ARG_WRONG,"This PetscViewer did not generate singleton");
180: }
182: ascii->sviewer = 0;
183: vascii->fd = stdout;
184: (*outviewer)->ops->destroy = PetscViewerDestroy_ASCIIh;
185: PetscViewerDestroy(*outviewer);
186: PetscViewerFlush(viewer);
187: return(0);
188: }
192: static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
193: {
194: PC_Redundant *red = (PC_Redundant*)pc->data;
196: PetscMPIInt rank;
197: PetscTruth iascii,isstring;
198: PetscViewer sviewer,subviewer;
199: PetscInt color = red->color;
202: MPI_Comm_rank(pc->comm,&rank);
203: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
204: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);
205: if (iascii) {
206: PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");
207: PetscViewerGetSubcomm_ASCII(viewer,red->pc->comm,&subviewer);
208: /* PetscViewerGetSingleton(viewer,&sviewer); */
209: if (!color) {
210: PetscViewerASCIIPushTab(viewer);
211: PCView(red->pc,subviewer);
212: PetscViewerASCIIPopTab(viewer);
213: }
214: /* PetscViewerRestoreSingleton(viewer,&sviewer); */
215: PetscViewerRestoreSubcomm_ASCII(viewer,red->pc->comm,&subviewer);
216: } else if (isstring) {
217: PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");
218: PetscViewerGetSingleton(viewer,&sviewer);
219: if (!rank) {
220: PCView(red->pc,sviewer);
221: }
222: PetscViewerRestoreSingleton(viewer,&sviewer);
223: } else {
224: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
225: }
226: return(0);
227: }
229: #include include/private/matimpl.h
230: #include private/vecimpl.h
231: #include src/mat/impls/aij/mpi/mpiaij.h
232: #include src/mat/impls/aij/seq/aij.h
234: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
235: PetscInt nzlocal,nsends,nrecvs;
236: PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
237: PetscScalar *sbuf_a,**rbuf_a;
238: PetscErrorCode (*MatDestroy)(Mat);
239: } Mat_Redundant;
243: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
244: {
245: PetscErrorCode ierr;
246: Mat_Redundant *redund=(Mat_Redundant*)ptr;
247: PetscInt i;
250: PetscFree(redund->send_rank);
251: PetscFree(redund->sbuf_j);
252: PetscFree(redund->sbuf_a);
253: for (i=0; i<redund->nrecvs; i++){
254: PetscFree(redund->rbuf_j[i]);
255: PetscFree(redund->rbuf_a[i]);
256: }
257: PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);
258: PetscFree(redund);
259: return(0);
260: }
264: PetscErrorCode MatDestroy_MatRedundant(Mat A)
265: {
266: PetscErrorCode ierr;
267: PetscContainer container;
268: Mat_Redundant *redund=PETSC_NULL;
271: PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);
272: if (container) {
273: PetscContainerGetPointer(container,(void **)&redund);
274: } else {
275: SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
276: }
277: A->ops->destroy = redund->MatDestroy;
278: PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
279: (*A->ops->destroy)(A);
280: PetscContainerDestroy(container);
281: return(0);
282: }
286: PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
287: {
288: PetscMPIInt rank,size;
289: MPI_Comm comm=mat->comm;
291: PetscInt nsends=0,nrecvs=0,i,rownz_max=0;
292: PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
293: PetscInt *rowrange=mat->rmap.range;
294: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
295: Mat A=aij->A,B=aij->B,C=*matredundant;
296: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
297: PetscScalar *sbuf_a;
298: PetscInt nzlocal=a->nz+b->nz;
299: PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
300: PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
301: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
302: PetscScalar *vals,*aworkA,*aworkB;
303: PetscMPIInt tag1,tag2,tag3,imdex;
304: MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
305: *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
306: MPI_Status recv_status,*send_status;
307: PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
308: PetscInt **rbuf_j=PETSC_NULL;
309: PetscScalar **rbuf_a=PETSC_NULL;
310: Mat_Redundant *redund=PETSC_NULL;
311: PetscContainer container;
314: MPI_Comm_rank(comm,&rank);
315: MPI_Comm_size(comm,&size);
317: if (reuse == MAT_REUSE_MATRIX) {
318: MatGetSize(C,&M,&N);
319: if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
320: MatGetLocalSize(C,&M,&N);
321: if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
322: PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);
323: if (container) {
324: PetscContainerGetPointer(container,(void **)&redund);
325: } else {
326: SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
327: }
328: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
330: nsends = redund->nsends;
331: nrecvs = redund->nrecvs;
332: send_rank = redund->send_rank; recv_rank = send_rank + size;
333: sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends;
334: sbuf_j = redund->sbuf_j;
335: sbuf_a = redund->sbuf_a;
336: rbuf_j = redund->rbuf_j;
337: rbuf_a = redund->rbuf_a;
338: }
340: if (reuse == MAT_INITIAL_MATRIX){
341: PetscMPIInt subrank,subsize;
342: PetscInt nleftover,np_subcomm;
343: /* get the destination processors' id send_rank, nsends and nrecvs */
344: MPI_Comm_rank(subcomm,&subrank);
345: MPI_Comm_size(subcomm,&subsize);
346: PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
347: recv_rank = send_rank + size;
348: np_subcomm = size/nsubcomm;
349: nleftover = size - nsubcomm*np_subcomm;
350: nsends = 0; nrecvs = 0;
351: for (i=0; i<size; i++){ /* i=rank*/
352: if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
353: send_rank[nsends] = i; nsends++;
354: recv_rank[nrecvs++] = i;
355: }
356: }
357: if (rank >= size - nleftover){/* this proc is a leftover processor */
358: i = size-nleftover-1;
359: j = 0;
360: while (j < nsubcomm - nleftover){
361: send_rank[nsends++] = i;
362: i--; j++;
363: }
364: }
366: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
367: for (i=0; i<nleftover; i++){
368: recv_rank[nrecvs++] = size-nleftover+i;
369: }
370: }
372: /* allocate sbuf_j, sbuf_a */
373: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
374: PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
375: PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
376: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
377:
378: /* copy mat's local entries into the buffers */
379: if (reuse == MAT_INITIAL_MATRIX){
380: rownz_max = 0;
381: rptr = sbuf_j;
382: cols = sbuf_j + rend-rstart + 1;
383: vals = sbuf_a;
384: rptr[0] = 0;
385: for (i=0; i<rend-rstart; i++){
386: row = i + rstart;
387: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
388: ncols = nzA + nzB;
389: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
390: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
391: /* load the column indices for this row into cols */
392: lwrite = 0;
393: for (l=0; l<nzB; l++) {
394: if ((ctmp = bmap[cworkB[l]]) < cstart){
395: vals[lwrite] = aworkB[l];
396: cols[lwrite++] = ctmp;
397: }
398: }
399: for (l=0; l<nzA; l++){
400: vals[lwrite] = aworkA[l];
401: cols[lwrite++] = cstart + cworkA[l];
402: }
403: for (l=0; l<nzB; l++) {
404: if ((ctmp = bmap[cworkB[l]]) >= cend){
405: vals[lwrite] = aworkB[l];
406: cols[lwrite++] = ctmp;
407: }
408: }
409: vals += ncols;
410: cols += ncols;
411: rptr[i+1] = rptr[i] + ncols;
412: if (rownz_max < ncols) rownz_max = ncols;
413: }
414: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
415: } else { /* only copy matrix values into sbuf_a */
416: rptr = sbuf_j;
417: vals = sbuf_a;
418: rptr[0] = 0;
419: for (i=0; i<rend-rstart; i++){
420: row = i + rstart;
421: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
422: ncols = nzA + nzB;
423: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
424: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
425: lwrite = 0;
426: for (l=0; l<nzB; l++) {
427: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
428: }
429: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
430: for (l=0; l<nzB; l++) {
431: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
432: }
433: vals += ncols;
434: rptr[i+1] = rptr[i] + ncols;
435: }
436: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
438: /* send nzlocal to others, and recv other's nzlocal */
439: /*--------------------------------------------------*/
440: if (reuse == MAT_INITIAL_MATRIX){
441: PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
442: s_waits2 = s_waits3 + nsends;
443: s_waits1 = s_waits2 + nsends;
444: r_waits1 = s_waits1 + nsends;
445: r_waits2 = r_waits1 + nrecvs;
446: r_waits3 = r_waits2 + nrecvs;
447: } else {
448: PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
449: r_waits3 = s_waits3 + nsends;
450: }
452: PetscObjectGetNewTag((PetscObject)mat,&tag3);
453: if (reuse == MAT_INITIAL_MATRIX){
454: /* get new tags to keep the communication clean */
455: PetscObjectGetNewTag((PetscObject)mat,&tag1);
456: PetscObjectGetNewTag((PetscObject)mat,&tag2);
457: PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
458: rbuf_nz = sbuf_nz + nsends;
459:
460: /* post receives of other's nzlocal */
461: for (i=0; i<nrecvs; i++){
462: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
463: }
464: /* send nzlocal to others */
465: for (i=0; i<nsends; i++){
466: sbuf_nz[i] = nzlocal;
467: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
468: }
469: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
470: count = nrecvs;
471: while (count) {
472: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
473: recv_rank[imdex] = recv_status.MPI_SOURCE;
474: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
475: PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);
477: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
478: rbuf_nz[imdex] += i + 2;
479: PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
480: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
481: count--;
482: }
483: /* wait on sends of nzlocal */
484: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
485: /* send mat->i,j to others, and recv from other's */
486: /*------------------------------------------------*/
487: for (i=0; i<nsends; i++){
488: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
489: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
490: }
491: /* wait on receives of mat->i,j */
492: /*------------------------------*/
493: count = nrecvs;
494: while (count) {
495: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
496: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
497: count--;
498: }
499: /* wait on sends of mat->i,j */
500: /*---------------------------*/
501: if (nsends) {
502: MPI_Waitall(nsends,s_waits2,send_status);
503: }
504: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
506: /* post receives, send and receive mat->a */
507: /*----------------------------------------*/
508: for (imdex=0; imdex<nrecvs; imdex++) {
509: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
510: }
511: for (i=0; i<nsends; i++){
512: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
513: }
514: count = nrecvs;
515: while (count) {
516: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
517: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
518: count--;
519: }
520: if (nsends) {
521: MPI_Waitall(nsends,s_waits3,send_status);
522: }
524: PetscFree2(s_waits3,send_status);
525:
526: /* create redundant matrix */
527: /*-------------------------*/
528: if (reuse == MAT_INITIAL_MATRIX){
529: /* compute rownz_max for preallocation */
530: for (imdex=0; imdex<nrecvs; imdex++){
531: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
532: rptr = rbuf_j[imdex];
533: for (i=0; i<j; i++){
534: ncols = rptr[i+1] - rptr[i];
535: if (rownz_max < ncols) rownz_max = ncols;
536: }
537: }
538:
539: MatCreate(subcomm,&C);
540: MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
541: MatSetFromOptions(C);
542: MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);
543: MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);
544: } else {
545: C = *matredundant;
546: }
548: /* insert local matrix entries */
549: rptr = sbuf_j;
550: cols = sbuf_j + rend-rstart + 1;
551: vals = sbuf_a;
552: for (i=0; i<rend-rstart; i++){
553: row = i + rstart;
554: ncols = rptr[i+1] - rptr[i];
555: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
556: vals += ncols;
557: cols += ncols;
558: }
559: /* insert received matrix entries */
560: for (imdex=0; imdex<nrecvs; imdex++){
561: rstart = rowrange[recv_rank[imdex]];
562: rend = rowrange[recv_rank[imdex]+1];
563: rptr = rbuf_j[imdex];
564: cols = rbuf_j[imdex] + rend-rstart + 1;
565: vals = rbuf_a[imdex];
566: for (i=0; i<rend-rstart; i++){
567: row = i + rstart;
568: ncols = rptr[i+1] - rptr[i];
569: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
570: vals += ncols;
571: cols += ncols;
572: }
573: }
574: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
575: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
576: MatGetSize(C,&M,&N);
577: if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
578: if (reuse == MAT_INITIAL_MATRIX){
579: PetscContainer container;
580: *matredundant = C;
581: /* create a supporting struct and attach it to C for reuse */
582: PetscNew(Mat_Redundant,&redund);
583: PetscContainerCreate(PETSC_COMM_SELF,&container);
584: PetscContainerSetPointer(container,redund);
585: PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
586: PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
587:
588: redund->nzlocal = nzlocal;
589: redund->nsends = nsends;
590: redund->nrecvs = nrecvs;
591: redund->send_rank = send_rank;
592: redund->sbuf_nz = sbuf_nz;
593: redund->sbuf_j = sbuf_j;
594: redund->sbuf_a = sbuf_a;
595: redund->rbuf_j = rbuf_j;
596: redund->rbuf_a = rbuf_a;
598: redund->MatDestroy = C->ops->destroy;
599: C->ops->destroy = MatDestroy_MatRedundant;
600: }
601: return(0);
602: }
606: static PetscErrorCode PCSetUp_Redundant(PC pc)
607: {
608: PC_Redundant *red = (PC_Redundant*)pc->data;
610: PetscInt mstart,mend,mlocal,m;
611: PetscMPIInt size;
612: MatReuse reuse = MAT_INITIAL_MATRIX;
613: MatStructure str = DIFFERENT_NONZERO_PATTERN;
614: MPI_Comm comm;
615: Vec vec;
617: PetscInt mlocal_sub;
618: PetscMPIInt subsize,subrank;
619: PetscInt rstart_sub,rend_sub,mloc_sub;
622: PetscObjectGetComm((PetscObject)pc->pmat,&comm);
623: MatGetVecs(pc->pmat,&vec,0);
624: PCSetFromOptions(red->pc);
625: VecGetSize(vec,&m);
626: if (!pc->setupcalled) {
627: /* create working vectors xsub/ysub and xdup/ydup */
628: VecGetLocalSize(vec,&mlocal);
629: VecGetOwnershipRange(vec,&mstart,&mend);
631: /* get local size of xsub/ysub */
632: MPI_Comm_size(red->subcomm,&subsize);
633: MPI_Comm_rank(red->subcomm,&subrank);
634: rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
635: if (subrank+1 < subsize){
636: rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
637: } else {
638: rend_sub = m;
639: }
640: mloc_sub = rend_sub - rstart_sub;
641: VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);
642: /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
643: VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);
645: /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
646: Note: we use communicator dupcomm, not pc->comm! */
647: VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);
648: VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);
649:
650: /* create vec scatters */
651: if (!red->scatterin){
652: IS is1,is2;
653: PetscInt *idx1,*idx2,i,j,k;
654: PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);
655: idx2 = idx1 + red->nsubcomm*mlocal;
656: j = 0;
657: for (k=0; k<red->nsubcomm; k++){
658: for (i=mstart; i<mend; i++){
659: idx1[j] = i;
660: idx2[j++] = i + m*k;
661: }
662: }
663: ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);
664: ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);
665: VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);
666: ISDestroy(is1);
667: ISDestroy(is2);
669: ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);
670: ISCreateStride(comm,mlocal,mstart,1,&is2);
671: VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);
672: ISDestroy(is1);
673: ISDestroy(is2);
674: PetscFree(idx1);
675: }
676: }
677: VecDestroy(vec);
679: /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
680: MPI_Comm_size(comm,&size);
681: if (size == 1) {
682: red->useparallelmat = PETSC_FALSE;
683: }
685: if (red->useparallelmat) {
686: if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
687: /* destroy old matrices */
688: if (red->pmats) {
689: MatDestroy(red->pmats);
690: }
691: } else if (pc->setupcalled == 1) {
692: reuse = MAT_REUSE_MATRIX;
693: str = SAME_NONZERO_PATTERN;
694: }
695:
696: /* grab the parallel matrix and put it into processors of a subcomminicator */
697: /*--------------------------------------------------------------------------*/
698: VecGetLocalSize(red->ysub,&mlocal_sub);
699: MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);
700: /* tell PC of the subcommunicator its operators */
701: PCSetOperators(red->pc,red->pmats,red->pmats,str);
702: } else {
703: PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);
704: }
705: PCSetFromOptions(red->pc);
706: PCSetUp(red->pc);
707: return(0);
708: }
712: static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
713: {
714: PC_Redundant *red = (PC_Redundant*)pc->data;
716: PetscScalar *array;
719: /* scatter x to xdup */
720: VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);
721: VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);
722:
723: /* place xdup's local array into xsub */
724: VecGetArray(red->xdup,&array);
725: VecPlaceArray(red->xsub,(const PetscScalar*)array);
727: /* apply preconditioner on each processor */
728: PCApply(red->pc,red->xsub,red->ysub);
729: VecResetArray(red->xsub);
730: VecRestoreArray(red->xdup,&array);
731:
732: /* place ysub's local array into ydup */
733: VecGetArray(red->ysub,&array);
734: VecPlaceArray(red->ydup,(const PetscScalar*)array);
736: /* scatter ydup to y */
737: VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);
738: VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);
739: VecResetArray(red->ydup);
740: VecRestoreArray(red->ysub,&array);
741: return(0);
742: }
746: static PetscErrorCode PCDestroy_Redundant(PC pc)
747: {
748: PC_Redundant *red = (PC_Redundant*)pc->data;
752: if (red->scatterin) {VecScatterDestroy(red->scatterin);}
753: if (red->scatterout) {VecScatterDestroy(red->scatterout);}
754: if (red->ysub) {VecDestroy(red->ysub);}
755: if (red->xsub) {VecDestroy(red->xsub);}
756: if (red->xdup) {VecDestroy(red->xdup);}
757: if (red->ydup) {VecDestroy(red->ydup);}
758: if (red->pmats) {
759: MatDestroy(red->pmats);
760: }
763: PCDestroy(red->pc);
764: PetscFree(red);
765: return(0);
766: }
770: static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
771: {
773: return(0);
774: }
779: PetscErrorCode PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
780: {
781: PC_Redundant *red = (PC_Redundant*)pc->data;
785: red->scatterin = in;
786: red->scatterout = out;
787: PetscObjectReference((PetscObject)in);
788: PetscObjectReference((PetscObject)out);
789: return(0);
790: }
795: /*@
796: PCRedundantSetScatter - Sets the scatter used to copy values into the
797: redundant local solve and the scatter to move them back into the global
798: vector.
800: Collective on PC
802: Input Parameters:
803: + pc - the preconditioner context
804: . in - the scatter to move the values in
805: - out - the scatter to move them out
807: Level: advanced
809: .keywords: PC, redundant solve
810: @*/
811: PetscErrorCode PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
812: {
813: PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
819: PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);
820: if (f) {
821: (*f)(pc,in,out);
822: }
823: return(0);
824: }
829: PetscErrorCode PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
830: {
831: PC_Redundant *red = (PC_Redundant*)pc->data;
834: *innerpc = red->pc;
835: return(0);
836: }
841: /*@
842: PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
844: Not Collective
846: Input Parameter:
847: . pc - the preconditioner context
849: Output Parameter:
850: . innerpc - the sequential PC
852: Level: advanced
854: .keywords: PC, redundant solve
855: @*/
856: PetscErrorCode PCRedundantGetPC(PC pc,PC *innerpc)
857: {
858: PetscErrorCode ierr,(*f)(PC,PC*);
863: PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);
864: if (f) {
865: (*f)(pc,innerpc);
866: }
867: return(0);
868: }
873: PetscErrorCode PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
874: {
875: PC_Redundant *red = (PC_Redundant*)pc->data;
878: if (mat) *mat = red->pmats;
879: if (pmat) *pmat = red->pmats;
880: return(0);
881: }
886: /*@
887: PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
889: Not Collective
891: Input Parameter:
892: . pc - the preconditioner context
894: Output Parameters:
895: + mat - the matrix
896: - pmat - the (possibly different) preconditioner matrix
898: Level: advanced
900: .keywords: PC, redundant solve
901: @*/
902: PetscErrorCode PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
903: {
904: PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
910: PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);
911: if (f) {
912: (*f)(pc,mat,pmat);
913: }
914: return(0);
915: }
917: /* -------------------------------------------------------------------------------------*/
918: /*MC
919: PCREDUNDANT - Runs a preconditioner for the entire problem on each processor
922: Options for the redundant preconditioners can be set with -redundant_pc_xxx
924: Level: intermediate
927: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
928: PCRedundantGetPC(), PCRedundantGetOperators()
930: M*/
935: PetscErrorCode PCCreate_Redundant(PC pc)
936: {
938: PC_Redundant *red;
939: const char *prefix;
940: PetscInt nsubcomm,np_subcomm,nleftover,i,j,color;
941: PetscMPIInt rank,size,subrank,*subsize,duprank;
942: MPI_Comm subcomm=0,dupcomm=0;
945: PetscNew(PC_Redundant,&red);
946: PetscLogObjectMemory(pc,sizeof(PC_Redundant));
947: red->useparallelmat = PETSC_TRUE;
948:
949: MPI_Comm_rank(pc->comm,&rank);
950: MPI_Comm_size(pc->comm,&size);
951: nsubcomm = size;
952: PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);
953: if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
955: /*--------------------------------------------------------------------------------------------------
956: To avoid data scattering from subcomm back to original comm, we create subcomm by iteratively taking a
957: processe into a subcomm.
958: An example: size=4, nsubcomm=3
959: pc->comm:
960: rank: [0] [1] [2] [3]
961: color: 0 1 2 0
963: subcomm:
964: subrank: [0] [0] [0] [1]
966: dupcomm:
967: duprank: [0] [2] [3] [1]
969: Here, subcomm[color = 0] has subsize=2, owns process [0] and [3]
970: subcomm[color = 1] has subsize=1, owns process [1]
971: subcomm[color = 2] has subsize=1, owns process [2]
972: dupcomm has same number of processes as pc->comm, and its duprank maps
973: processes in subcomm contiguously into a 1d array:
974: duprank: [0] [1] [2] [3]
975: rank: [0] [3] [1] [2]
976: subcomm[0] subcomm[1] subcomm[2]
977: ----------------------------------------------------------------------------------------*/
979: /* get size of each subcommunicators */
980: PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);
981: np_subcomm = size/nsubcomm;
982: nleftover = size - nsubcomm*np_subcomm;
983: for (i=0; i<nsubcomm; i++){
984: subsize[i] = np_subcomm;
985: if (i<nleftover) subsize[i]++;
986: }
988: /* find color for this proc */
989: color = rank%nsubcomm;
990: subrank = rank/nsubcomm;
992: MPI_Comm_split(pc->comm,color,subrank,&subcomm);
993: red->subcomm = subcomm;
994: red->color = color;
995: red->nsubcomm = nsubcomm;
997: j = 0; duprank = 0;
998: for (i=0; i<nsubcomm; i++){
999: if (j == color){
1000: duprank += subrank;
1001: break;
1002: }
1003: duprank += subsize[i]; j++;
1004: }
1005:
1006: /* create dupcomm with same size as comm, but its rank, duprank, maps subcomm's contiguously into dupcomm */
1007: MPI_Comm_split(pc->comm,0,duprank,&dupcomm);
1008: red->dupcomm = dupcomm;
1009: PetscFree(subsize);
1011: /* create the sequential PC that each processor has copy of */
1012: PCCreate(subcomm,&red->pc);
1013: PCSetType(red->pc,PCLU);
1014: PCGetOptionsPrefix(pc,&prefix);
1015: PCSetOptionsPrefix(red->pc,prefix);
1016: PCAppendOptionsPrefix(red->pc,"redundant_");
1018: pc->ops->apply = PCApply_Redundant;
1019: pc->ops->applytranspose = 0;
1020: pc->ops->setup = PCSetUp_Redundant;
1021: pc->ops->destroy = PCDestroy_Redundant;
1022: pc->ops->setfromoptions = PCSetFromOptions_Redundant;
1023: pc->ops->setuponblocks = 0;
1024: pc->ops->view = PCView_Redundant;
1025: pc->ops->applyrichardson = 0;
1027: pc->data = (void*)red;
1029: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
1030: PCRedundantSetScatter_Redundant);
1031: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
1032: PCRedundantGetPC_Redundant);
1033: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
1034: PCRedundantGetOperators_Redundant);
1036: return(0);
1037: }