Actual source code: openmp.c

  1: #define PETSCKSP_DLL

 3:  #include private/pcimpl.h
 4:  #include petscksp.h

  6: typedef struct {
  7:   PetscInt   n;
  8:   MPI_Comm   comm;                 /* local world used by this preconditioner */
  9:   PC         pc;                   /* actual preconditioner used across local world */
 10:   Mat        mat;                  /* matrix in local world */
 11:   Mat        gmat;                 /* matrix known only to process 0 in the local world */
 12:   Vec        x,y,xdummy,ydummy;
 13:   VecScatter scatter;
 14: } PC_OpenMP;


 19: /*
 20:     Would like to have this simply call PCView() on the inner PC. The problem is
 21:   that the outter comm does not live on the inside so cannot do this. Instead 
 22:   handle the special case when the viewer is stdout, construct a new one just
 23:   for this call.
 24: */

 26: static PetscErrorCode PCView_OpenMP_MP(MPI_Comm comm,void *ctx)
 27: {
 28:   PC_OpenMP      *red = (PC_OpenMP*)ctx;
 30:   PetscViewer    viewer;

 33:   PetscViewerASCIIGetStdout(comm,&viewer);
 34:   PetscViewerASCIIPushTab(viewer);         /* this is bogus in general */
 35:   PCView(red->pc,viewer);
 36:   PetscViewerASCIIPopTab(viewer);
 37:   return(0);
 38: }

 42: static PetscErrorCode PCView_OpenMP(PC pc,PetscViewer viewer)
 43: {
 44:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;
 45:   PetscMPIInt    size;
 47:   PetscTruth     iascii;


 52:   MPI_Comm_size(red->comm,&size);
 53:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
 54:   if (iascii) {
 55:     PetscViewerASCIIPrintf(viewer,"  Size of solver nodes %d\n",size);
 56:     PetscViewerASCIIPrintf(viewer,"  Parallel sub-solver given next\n",size);
 57:     /* should only make the next call if the viewer is associated with stdout */
 58:     PetscOpenMPRun(red->comm,PCView_OpenMP_MP,red);
 59:   }
 60:   return(0);
 61: }

 63:  #include include/private/matimpl.h
 64:  #include private/vecimpl.h
 65:  #include src/mat/impls/aij/mpi/mpiaij.h
 66:  #include src/mat/impls/aij/seq/aij.h

 70: /*
 71:     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
 72:     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.

 74:     Only for square matrices
 75: */
 76: static PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,Mat *inmat)
 77: {
 78:   PetscMPIInt    rank,size;
 79:   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row;
 81:   Mat            mat;
 82:   Mat_SeqAIJ     *gmata;
 83:   PetscMPIInt    tag;
 84:   MPI_Status     status;
 85:   PetscTruth     aij;
 86:   PetscScalar    *gmataa;

 89:   CHKMEMQ;
 90:   MPI_Comm_rank(comm,&rank);
 91:   MPI_Comm_size(comm,&size);
 92:   if (!rank) {
 93:     PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
 94:     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",gmat->type_name);
 95:   }
 96:   MatCreate(comm,&mat);
 97:   MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
 98:   MatSetType(mat,MATAIJ);
 99:   PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
100:   PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
101:   MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
102:   rowners[0] = 0;
103:   for (i=2; i<=size; i++) {
104:     rowners[i] += rowners[i-1];
105:   }
106:   rstart = rowners[rank];
107:   rend   = rowners[rank+1];
108:   PetscObjectGetNewTag((PetscObject)mat,&tag);
109:   if (!rank) {
110:     gmata = (Mat_SeqAIJ*) gmat->data;
111:     /* send row lengths to all processors */
112:     for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
113:     for (i=1; i<size; i++) {
114:       MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
115:     }
116:     /* determine number diagonal and off-diagonal counts */
117:     PetscMemzero(olens,m*sizeof(PetscInt));
118:     jj = 0;
119:     for (i=0; i<m; i++) {
120:       for (j=0; j<dlens[i]; j++) {
121:         if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
122:         jj++;
123:       }
124:     }
125:     /* send column indices to other processes */
126:     for (i=1; i<size; i++) {
127:       nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
128:       MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
129:       MPI_Send(gmata->j + gmata->i[rowners[i]],gmata->i[rowners[i+1]]-gmata->i[rowners[i]],MPIU_INT,i,tag,comm);
130:     }
131:     /* send numerical values to other processes */
132:     for (i=1; i<size; i++) {
133:       nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
134:       MPI_Send(gmata->a + gmata->i[rowners[i]],gmata->i[rowners[i+1]]-gmata->i[rowners[i]],MPIU_SCALAR,i,tag,comm);
135:     }
136:     gmataa = gmata->a;
137:     gmataj = gmata->j;

139:   } else {
140:     /* receive row lengths */
141:     MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
142:     /* receive column indices */
143:     MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
144:     PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
145:     MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
146:     /* determine number diagonal and off-diagonal counts */
147:     PetscMemzero(olens,m*sizeof(PetscInt));
148:     jj = 0;
149:     for (i=0; i<m; i++) {
150:       for (j=0; j<dlens[i]; j++) {
151:         if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
152:         jj++;
153:       }
154:     }
155:     /* receive numerical values */
156:   PetscMemzero(gmataa,nz*sizeof(PetscScalar));
157:   MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
158:   }
159:   /* set preallocation */
160:   for (i=0; i<m; i++) {
161:     dlens[i] -= olens[i];
162:   }
163:   MatSeqAIJSetPreallocation(mat,0,dlens);
164:   MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);

166:   for (i=0; i<m; i++) {
167:     dlens[i] += olens[i];
168:   }
169:   cnt  = 0;
170:   for (i=0; i<m; i++) {
171:     row  = rstart + i;
172:     MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
173:     cnt += dlens[i];
174:   }
175:   if (rank) {
176:     PetscFree2(gmataa,gmataj);
177:   }
178:   PetscFree2(dlens,olens);
179:   PetscFree(rowners);
180:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
181:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
182:   CHKMEMQ;
183:   *inmat = mat;
184:   return(0);
185: }

189: static PetscErrorCode PCSetUp_OpenMP_MP(MPI_Comm comm,void *ctx)
190: {
191:   PC_OpenMP      *red = (PC_OpenMP*)ctx;
193:   PetscInt       m;

196:   /* setup vector communication */
197:   MPI_Bcast(&red->n,1,MPIU_INT,0,comm);
198:   VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->x);
199:   VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->y);
200:   VecScatterCreateToZero(red->x,&red->scatter,&red->xdummy);
201:   VecDuplicate(red->xdummy,&red->ydummy);

203:   /* copy matrix out onto processes */
204:   VecGetLocalSize(red->x,&m);
205:   MatDistribute_MPIAIJ(comm,red->gmat,m,&red->mat);

207:   /* create the solver */
208:   PCCreate(comm,&red->pc);
209:   PCSetOptionsPrefix(red->pc,"openmp_"); /* should actually append with global pc prefix */
210:   PCSetType(red->pc,PCKSP);
211:   PCSetOperators(red->pc,red->mat,red->mat,DIFFERENT_NONZERO_PATTERN);
212:   PCSetFromOptions(red->pc);
213:   return(0);
214: }

218: static PetscErrorCode PCSetUp_OpenMP(PC pc)
219: {
220:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;

224:   red->gmat = pc->mat;
225:   MatGetSize(pc->mat,&red->n,PETSC_IGNORE);
226:   PetscOpenMPRun(red->comm,PCSetUp_OpenMP_MP,red);
227:   VecDestroy(red->xdummy);
228:   VecDestroy(red->ydummy);
229:   return(0);
230: }

234: static PetscErrorCode PCApply_OpenMP_MP(MPI_Comm comm,void *ctx)
235: {
236:   PC_OpenMP      *red = (PC_OpenMP*)ctx;

240:   VecScatterBegin(red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE,red->scatter);
241:   VecScatterEnd(red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE,red->scatter);

243:   PCApply(red->pc,red->x,red->y);

245:   VecScatterBegin(red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD,red->scatter);
246:   VecScatterEnd(red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD,red->scatter);
247:   return(0);
248: }

252: static PetscErrorCode PCApply_OpenMP(PC pc,Vec x,Vec y)
253: {
254:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;

258:   red->xdummy = x;
259:   red->ydummy = y;
260:   PetscOpenMPRun(red->comm,PCApply_OpenMP_MP,red);
261:   return(0);
262: }

266: static PetscErrorCode PCDestroy_OpenMP_MP(MPI_Comm comm,void *ctx)
267: {
268:   PC_OpenMP      *red = (PC_OpenMP*)ctx;
269:   PetscMPIInt    rank;

273:   if (red->scatter) {VecScatterDestroy(red->scatter);}
274:   if (red->x) {VecDestroy(red->x);}
275:   if (red->y) {VecDestroy(red->y);}
276:   MPI_Comm_rank(comm,&rank);
277:   if (rank) {
278:     if (red->xdummy) {VecDestroy(red->xdummy);}
279:     if (red->ydummy) {VecDestroy(red->ydummy);}
280:   }
281:   return(0);
282: }

286: static PetscErrorCode PCDestroy_OpenMP(PC pc)
287: {
288:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;

292:   PetscOpenMPRun(red->comm,PCDestroy_OpenMP_MP,red);
293:   PetscOpenMPFree(red->comm,red);
294:   return(0);
295: }

299: static PetscErrorCode PCSetFromOptions_OpenMP(PC pc)
300: {
302:   return(0);
303: }


306: /* -------------------------------------------------------------------------------------*/
307: /*MC
308:      PCOPENMP - Runs a preconditioner for a single process matrix across several MPI processes

310: $     This will usually be run with -pc_type openmp -ksp_type gmres -openmp_pc_type ksp then
311: $     solver options are set with -openmp_ksp_ksp_... and -openmp_ksp_pc_... for example
312: $     -openmp_ksp_ksp_type cg would use cg as the Krylov method or -openmp_ksp_ksp_monitor or
313: $     -openmp_ksp_pc_type hypre -openmp_ksp_pc_hypre_type boomeramg

315:        Always run with -ksp_view (or -snes_view) to see what solver is actually being used.

317:        Currently the solver options INSIDE the OpenMP preconditioner can ONLY be set via the
318:       options database.

320:    Level: intermediate

322:    See PetscOpenMPMerge() and PetscOpenMPSpawn() for two ways to start up MPI for use with this preconditioner

324: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types)

326: M*/

332: PetscErrorCode  PCCreate_OpenMP(PC pc)
333: {
335:   PC_OpenMP      *red;
336:   PetscMPIInt    size;

339:   MPI_Comm_size(pc->comm,&size);
340:   if (size > 1) SETERRQ(PETSC_ERR_ARG_SIZ,"OpenMP preconditioner only works for sequential solves");
341:   PetscOpenMPNew(PETSC_COMM_LOCAL_WORLD,sizeof(PC_OpenMP),(void**)&red);
342:   red->comm = PETSC_COMM_LOCAL_WORLD;
343:   pc->data  = (void*) red;

345:   pc->ops->apply          = PCApply_OpenMP;
346:   pc->ops->destroy        = PCDestroy_OpenMP;
347:   pc->ops->setfromoptions = PCSetFromOptions_OpenMP;
348:   pc->ops->setup          = PCSetUp_OpenMP;
349:   pc->ops->view           = PCView_OpenMP;
350:   return(0);
351: }