Actual source code: openmp.c
1: #define PETSCKSP_DLL
3: #include private/pcimpl.h
4: #include petscksp.h
6: typedef struct {
7: PetscInt n;
8: MPI_Comm comm; /* local world used by this preconditioner */
9: PC pc; /* actual preconditioner used across local world */
10: Mat mat; /* matrix in local world */
11: Mat gmat; /* matrix known only to process 0 in the local world */
12: Vec x,y,xdummy,ydummy;
13: VecScatter scatter;
14: } PC_OpenMP;
19: /*
20: Would like to have this simply call PCView() on the inner PC. The problem is
21: that the outter comm does not live on the inside so cannot do this. Instead
22: handle the special case when the viewer is stdout, construct a new one just
23: for this call.
24: */
26: static PetscErrorCode PCView_OpenMP_MP(MPI_Comm comm,void *ctx)
27: {
28: PC_OpenMP *red = (PC_OpenMP*)ctx;
30: PetscViewer viewer;
33: PetscViewerASCIIGetStdout(comm,&viewer);
34: PetscViewerASCIIPushTab(viewer); /* this is bogus in general */
35: PCView(red->pc,viewer);
36: PetscViewerASCIIPopTab(viewer);
37: return(0);
38: }
42: static PetscErrorCode PCView_OpenMP(PC pc,PetscViewer viewer)
43: {
44: PC_OpenMP *red = (PC_OpenMP*)pc->data;
45: PetscMPIInt size;
47: PetscTruth iascii;
52: MPI_Comm_size(red->comm,&size);
53: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
54: if (iascii) {
55: PetscViewerASCIIPrintf(viewer," Size of solver nodes %d\n",size);
56: PetscViewerASCIIPrintf(viewer," Parallel sub-solver given next\n",size);
57: /* should only make the next call if the viewer is associated with stdout */
58: PetscOpenMPRun(red->comm,PCView_OpenMP_MP,red);
59: }
60: return(0);
61: }
63: #include include/private/matimpl.h
64: #include private/vecimpl.h
65: #include src/mat/impls/aij/mpi/mpiaij.h
66: #include src/mat/impls/aij/seq/aij.h
70: /*
71: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
72: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
74: Only for square matrices
75: */
76: static PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,Mat *inmat)
77: {
78: PetscMPIInt rank,size;
79: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row;
81: Mat mat;
82: Mat_SeqAIJ *gmata;
83: PetscMPIInt tag;
84: MPI_Status status;
85: PetscTruth aij;
86: PetscScalar *gmataa;
89: CHKMEMQ;
90: MPI_Comm_rank(comm,&rank);
91: MPI_Comm_size(comm,&size);
92: if (!rank) {
93: PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
94: if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",gmat->type_name);
95: }
96: MatCreate(comm,&mat);
97: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
98: MatSetType(mat,MATAIJ);
99: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
100: PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
101: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
102: rowners[0] = 0;
103: for (i=2; i<=size; i++) {
104: rowners[i] += rowners[i-1];
105: }
106: rstart = rowners[rank];
107: rend = rowners[rank+1];
108: PetscObjectGetNewTag((PetscObject)mat,&tag);
109: if (!rank) {
110: gmata = (Mat_SeqAIJ*) gmat->data;
111: /* send row lengths to all processors */
112: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
113: for (i=1; i<size; i++) {
114: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
115: }
116: /* determine number diagonal and off-diagonal counts */
117: PetscMemzero(olens,m*sizeof(PetscInt));
118: jj = 0;
119: for (i=0; i<m; i++) {
120: for (j=0; j<dlens[i]; j++) {
121: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
122: jj++;
123: }
124: }
125: /* send column indices to other processes */
126: for (i=1; i<size; i++) {
127: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
128: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
129: MPI_Send(gmata->j + gmata->i[rowners[i]],gmata->i[rowners[i+1]]-gmata->i[rowners[i]],MPIU_INT,i,tag,comm);
130: }
131: /* send numerical values to other processes */
132: for (i=1; i<size; i++) {
133: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
134: MPI_Send(gmata->a + gmata->i[rowners[i]],gmata->i[rowners[i+1]]-gmata->i[rowners[i]],MPIU_SCALAR,i,tag,comm);
135: }
136: gmataa = gmata->a;
137: gmataj = gmata->j;
139: } else {
140: /* receive row lengths */
141: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
142: /* receive column indices */
143: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
144: PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
145: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
146: /* determine number diagonal and off-diagonal counts */
147: PetscMemzero(olens,m*sizeof(PetscInt));
148: jj = 0;
149: for (i=0; i<m; i++) {
150: for (j=0; j<dlens[i]; j++) {
151: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
152: jj++;
153: }
154: }
155: /* receive numerical values */
156: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
157: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
158: }
159: /* set preallocation */
160: for (i=0; i<m; i++) {
161: dlens[i] -= olens[i];
162: }
163: MatSeqAIJSetPreallocation(mat,0,dlens);
164: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
166: for (i=0; i<m; i++) {
167: dlens[i] += olens[i];
168: }
169: cnt = 0;
170: for (i=0; i<m; i++) {
171: row = rstart + i;
172: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
173: cnt += dlens[i];
174: }
175: if (rank) {
176: PetscFree2(gmataa,gmataj);
177: }
178: PetscFree2(dlens,olens);
179: PetscFree(rowners);
180: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
181: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
182: CHKMEMQ;
183: *inmat = mat;
184: return(0);
185: }
189: static PetscErrorCode PCSetUp_OpenMP_MP(MPI_Comm comm,void *ctx)
190: {
191: PC_OpenMP *red = (PC_OpenMP*)ctx;
193: PetscInt m;
196: /* setup vector communication */
197: MPI_Bcast(&red->n,1,MPIU_INT,0,comm);
198: VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->x);
199: VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->y);
200: VecScatterCreateToZero(red->x,&red->scatter,&red->xdummy);
201: VecDuplicate(red->xdummy,&red->ydummy);
203: /* copy matrix out onto processes */
204: VecGetLocalSize(red->x,&m);
205: MatDistribute_MPIAIJ(comm,red->gmat,m,&red->mat);
207: /* create the solver */
208: PCCreate(comm,&red->pc);
209: PCSetOptionsPrefix(red->pc,"openmp_"); /* should actually append with global pc prefix */
210: PCSetType(red->pc,PCKSP);
211: PCSetOperators(red->pc,red->mat,red->mat,DIFFERENT_NONZERO_PATTERN);
212: PCSetFromOptions(red->pc);
213: return(0);
214: }
218: static PetscErrorCode PCSetUp_OpenMP(PC pc)
219: {
220: PC_OpenMP *red = (PC_OpenMP*)pc->data;
224: red->gmat = pc->mat;
225: MatGetSize(pc->mat,&red->n,PETSC_IGNORE);
226: PetscOpenMPRun(red->comm,PCSetUp_OpenMP_MP,red);
227: VecDestroy(red->xdummy);
228: VecDestroy(red->ydummy);
229: return(0);
230: }
234: static PetscErrorCode PCApply_OpenMP_MP(MPI_Comm comm,void *ctx)
235: {
236: PC_OpenMP *red = (PC_OpenMP*)ctx;
240: VecScatterBegin(red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE,red->scatter);
241: VecScatterEnd(red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE,red->scatter);
243: PCApply(red->pc,red->x,red->y);
245: VecScatterBegin(red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD,red->scatter);
246: VecScatterEnd(red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD,red->scatter);
247: return(0);
248: }
252: static PetscErrorCode PCApply_OpenMP(PC pc,Vec x,Vec y)
253: {
254: PC_OpenMP *red = (PC_OpenMP*)pc->data;
258: red->xdummy = x;
259: red->ydummy = y;
260: PetscOpenMPRun(red->comm,PCApply_OpenMP_MP,red);
261: return(0);
262: }
266: static PetscErrorCode PCDestroy_OpenMP_MP(MPI_Comm comm,void *ctx)
267: {
268: PC_OpenMP *red = (PC_OpenMP*)ctx;
269: PetscMPIInt rank;
273: if (red->scatter) {VecScatterDestroy(red->scatter);}
274: if (red->x) {VecDestroy(red->x);}
275: if (red->y) {VecDestroy(red->y);}
276: MPI_Comm_rank(comm,&rank);
277: if (rank) {
278: if (red->xdummy) {VecDestroy(red->xdummy);}
279: if (red->ydummy) {VecDestroy(red->ydummy);}
280: }
281: return(0);
282: }
286: static PetscErrorCode PCDestroy_OpenMP(PC pc)
287: {
288: PC_OpenMP *red = (PC_OpenMP*)pc->data;
292: PetscOpenMPRun(red->comm,PCDestroy_OpenMP_MP,red);
293: PetscOpenMPFree(red->comm,red);
294: return(0);
295: }
299: static PetscErrorCode PCSetFromOptions_OpenMP(PC pc)
300: {
302: return(0);
303: }
306: /* -------------------------------------------------------------------------------------*/
307: /*MC
308: PCOPENMP - Runs a preconditioner for a single process matrix across several MPI processes
310: $ This will usually be run with -pc_type openmp -ksp_type gmres -openmp_pc_type ksp then
311: $ solver options are set with -openmp_ksp_ksp_... and -openmp_ksp_pc_... for example
312: $ -openmp_ksp_ksp_type cg would use cg as the Krylov method or -openmp_ksp_ksp_monitor or
313: $ -openmp_ksp_pc_type hypre -openmp_ksp_pc_hypre_type boomeramg
315: Always run with -ksp_view (or -snes_view) to see what solver is actually being used.
317: Currently the solver options INSIDE the OpenMP preconditioner can ONLY be set via the
318: options database.
320: Level: intermediate
322: See PetscOpenMPMerge() and PetscOpenMPSpawn() for two ways to start up MPI for use with this preconditioner
324: .seealso: PCCreate(), PCSetType(), PCType (for list of available types)
326: M*/
332: PetscErrorCode PCCreate_OpenMP(PC pc)
333: {
335: PC_OpenMP *red;
336: PetscMPIInt size;
339: MPI_Comm_size(pc->comm,&size);
340: if (size > 1) SETERRQ(PETSC_ERR_ARG_SIZ,"OpenMP preconditioner only works for sequential solves");
341: PetscOpenMPNew(PETSC_COMM_LOCAL_WORLD,sizeof(PC_OpenMP),(void**)&red);
342: red->comm = PETSC_COMM_LOCAL_WORLD;
343: pc->data = (void*) red;
345: pc->ops->apply = PCApply_OpenMP;
346: pc->ops->destroy = PCDestroy_OpenMP;
347: pc->ops->setfromoptions = PCSetFromOptions_OpenMP;
348: pc->ops->setup = PCSetUp_OpenMP;
349: pc->ops->view = PCView_OpenMP;
350: return(0);
351: }