Actual source code: mpinit.c
3: #include petsc.h
4: #include petscsys.h
6: static MPI_Comm saved_PETSC_COMM_WORLD = 0;
7: MPI_Comm PETSC_COMM_LOCAL_WORLD = 0; /* comm for a single node (local set of processes) */
8: static PetscTruth used_PetscOpenMP = PETSC_FALSE;
12: #if defined(PETSC_HAVE_MPI_COMM_SPAWN)
15: /*@C
16: PetscOpenMPSpawn - Initialize additional processes to be used as "worker" processes. This is not generally
17: called by users. One should use -openmp_spawn_size <n> to indicate that you wish to have n-1 new MPI
18: processes spawned for each current process.
20: Not Collective (could make collective on MPI_COMM_WORLD, generate one huge comm and then split it up)
22: Input Parameter:
23: . nodesize - size of each compute node that will share processors
25: Options Database:
26: . -openmp_spawn_size nodesize
28: Notes: This is only supported on systems with an MPI 2 implementation that includes the MPI_Comm_Spawn() routine.
30: $ Comparison of two approaches for OpenMP usage (MPI started with N processes)
31: $
32: $ -openmp_spawn_size <n> requires MPI 2, results in n*N total processes with N directly used by application code
33: $ and n-1 worker processes (used by PETSc) for each application node.
34: $ You MUST launch MPI so that only ONE MPI process is created for each hardware node.
35: $
36: $ -openmp_merge_size <n> results in N total processes, N/n used by the application code and the rest worker processes
37: $ (used by PETSc)
38: $ You MUST launch MPI so that n MPI processes are created for each hardware node.
39: $
40: $ petscmpirun -np 2 ./ex1 -openmp_spawn_size 3 gives 2 application nodes (and 4 PETSc worker nodes)
41: $ petscmpirun -np 6 ./ex1 -openmp_merge_size 3 gives the SAME 2 application nodes and 4 PETSc worker nodes
42: $ This is what would use if each of the computers hardware nodes had 3 CPUs.
43: $
44: $ These are intended to be used in conjunction with USER OpenMP code. The user will have 1 process per
45: $ computer (hardware) node (where the computer node has p cpus), the user's code will use threads to fully
46: $ utilize all the CPUs on the node. The PETSc code will have p processes to fully use the compute node for
47: $ PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs
48: $ are always working on p task, never more than p.
49: $
50: $ See PCOPENMP for a PETSc preconditioner that can use this functionality
51: $
52: Level: developer
54: Concepts: OpenMP
55:
56: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscOpenMPFinalize(), PetscInitialize(), PetscOpenMPMerge()
58: @*/
59: PetscErrorCode PetscOpenMPSpawn(PetscMPIInt nodesize)
60: {
62: PetscMPIInt size;
63: MPI_Comm parent,children;
64:
66: MPI_Comm_get_parent(&parent);
67: if (parent == MPI_COMM_NULL) {
68: char programname[PETSC_MAX_PATH_LEN];
69: char **argv;
71: PetscGetProgramName(programname,PETSC_MAX_PATH_LEN);
72: PetscGetArguments(&argv);
73: MPI_Comm_spawn(programname,argv,nodesize-1,MPI_INFO_NULL,0,PETSC_COMM_SELF,&children,MPI_ERRCODES_IGNORE);
74: PetscFreeArguments(argv);
75: MPI_Intercomm_merge(children,0,&PETSC_COMM_LOCAL_WORLD);
77: MPI_Comm_size(PETSC_COMM_WORLD,&size);
78: PetscInfo2(0,"PETSc OpenMP successfully spawned: number of nodes = %d node size = %d\n",size,nodesize);
79: saved_PETSC_COMM_WORLD = PETSC_COMM_WORLD;
80: used_PetscOpenMP = PETSC_TRUE;
81: } else { /* worker nodes that get spawned */
82: MPI_Intercomm_merge(parent,1,&PETSC_COMM_LOCAL_WORLD);
83: PetscOpenMPHandle(PETSC_COMM_LOCAL_WORLD);
84: used_PetscOpenMP = PETSC_FALSE; /* so that PetscOpenMPFinalize() will not attempt a broadcast from this process */
85: PetscEnd(); /* cannot continue into user code */
86: }
87: return(0);
88: }
89: #endif
93: /*@C
94: PetscOpenMPMerge - Initializes the PETSc and MPI to work with OpenMP. This is not usually called
95: by the user. One should use -openmp_merge_size <n> to indicate the node size of merged communicator
96: to be.
98: Collective on MPI_COMM_WORLD or PETSC_COMM_WORLD if it has been set
100: Input Parameter:
101: . nodesize - size of each compute node that will share processors
103: Options Database:
104: . -openmp_merge_size
106: Level: developer
108: $ Comparison of two approaches for OpenMP usage (MPI started with N processes)
109: $
110: $ -openmp_spawn_size <n> requires MPI 2, results in n*N total processes with N directly used by application code
111: $ and n-1 worker processes (used by PETSc) for each application node.
112: $ You MUST launch MPI so that only ONE MPI process is created for each hardware node.
113: $
114: $ -openmp_merge_size <n> results in N total processes, N/n used by the application code and the rest worker processes
115: $ (used by PETSc)
116: $ You MUST launch MPI so that n MPI processes are created for each hardware node.
117: $
118: $ petscmpirun -np 2 ./ex1 -openmp_spawn_size 3 gives 2 application nodes (and 4 PETSc worker nodes)
119: $ petscmpirun -np 6 ./ex1 -openmp_merge_size 3 gives the SAME 2 application nodes and 4 PETSc worker nodes
120: $ This is what would use if each of the computers hardware nodes had 3 CPUs.
121: $
122: $ These are intended to be used in conjunction with USER OpenMP code. The user will have 1 process per
123: $ computer (hardware) node (where the computer node has p cpus), the user's code will use threads to fully
124: $ utilize all the CPUs on the node. The PETSc code will have p processes to fully use the compute node for
125: $ PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs
126: $ are always working on p task, never more than p.
127: $
128: $ See PCOPENMP for a PETSc preconditioner that can use this functionality
129: $
131: Concepts: OpenMP
132:
133: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscOpenMPFinalize(), PetscInitialize()
135: @*/
136: PetscErrorCode PetscOpenMPMerge(PetscMPIInt nodesize)
137: {
139: PetscMPIInt size,rank,*ranks,i;
140: MPI_Group group,newgroup;
143: saved_PETSC_COMM_WORLD = PETSC_COMM_WORLD;
145: MPI_Comm_size(saved_PETSC_COMM_WORLD,&size);
146: if (size % nodesize) SETERRQ2(PETSC_ERR_ARG_SIZ,"Total number of process nodes %d is not divisible by number of processes per node %d",size,nodesize);
147: MPI_Comm_rank(saved_PETSC_COMM_WORLD,&rank);
150: /* create two communicators
151: *) one that contains the first process from each node: 0,nodesize,2*nodesize,...
152: *) one that contains all processes in a node: (0,1,2...,nodesize-1), (nodesize,nodesize+1,...2*nodesize-), ...
153: */
154: MPI_Comm_group(saved_PETSC_COMM_WORLD,&group);
155: PetscMalloc((size/nodesize)*sizeof(PetscMPIInt),&ranks);
156: for (i=0; i<(size/nodesize); i++) ranks[i] = i*nodesize;
157: MPI_Group_incl(group,size/nodesize,ranks,&newgroup);
158: PetscFree(ranks);
159: MPI_Comm_create(saved_PETSC_COMM_WORLD,newgroup,&PETSC_COMM_WORLD);
160: if (rank % nodesize) PETSC_COMM_WORLD = 0; /* mark invalid processes for easy debugging */
161: MPI_Group_free(&group);
162: MPI_Group_free(&newgroup);
164: MPI_Comm_split(saved_PETSC_COMM_WORLD,rank/nodesize,rank % nodesize,&PETSC_COMM_LOCAL_WORLD);
166: PetscInfo2(0,"PETSc OpenMP successfully started: number of nodes = %d node size = %d\n",size/nodesize,nodesize);
167: PetscInfo1(0,"PETSc OpenMP process %sactive\n",(rank % nodesize) ? "in" : "");
169: used_PetscOpenMP = PETSC_TRUE;
170: /*
171: All process not involved in user application code wait here
172: */
173: if (!PETSC_COMM_WORLD) {
174: PetscOpenMPHandle(PETSC_COMM_LOCAL_WORLD);
175: PETSC_COMM_WORLD = saved_PETSC_COMM_WORLD;
176: used_PetscOpenMP = PETSC_FALSE; /* so that PetscOpenMPIFinalize() will not attempt a broadcast from this process */
177: PetscEnd(); /* cannot continue into user code */
178: }
179: return(0);
180: }
184: /*@C
185: PetscOpenMPFinalizes - Finalizes the PETSc and MPI to work with OpenMP. Called by PetscFinalize() cannot
186: be called by user.
188: Collective on the entire system
190: Level: developer
191:
192: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscOpenMPMerge()
194: @*/
195: PetscErrorCode PetscOpenMPFinalize(void)
196: {
197: PetscErrorCode 0;
198: PetscInt command = 3;
201: if (!used_PetscOpenMP) return(0);
202: MPI_Bcast(&command,1,MPIU_INT,0,PETSC_COMM_LOCAL_WORLD);
203: PETSC_COMM_WORLD = saved_PETSC_COMM_WORLD;
204: PetscFunctionReturn(ierr);
205: }
207: static PetscInt numberobjects = 0;
208: static void *objects[100];
212: /*@C
213: PetscOpenMPHandle - Receives commands from the master node and processes them
215: Collective on MPI_Comm
217: Level: developer
218:
219: .seealso: PetscOpenMPMerge()
221: @*/
222: PetscErrorCode PetscOpenMPHandle(MPI_Comm comm)
223: {
225: PetscInt command;
226: PetscTruth exitwhileloop = PETSC_FALSE;
229: while (!exitwhileloop) {
230: MPI_Bcast(&command,1,MPIU_INT,0,comm);
231: switch (command) {
232: case 0: {
233: size_t n;
234: void *ptr;
235: MPI_Bcast(&n,1,MPI_INT,0,comm); /* may be wrong size here */
236: /* cannot use PetscNew() cause it requires struct argument */
237: PetscMalloc(n,&ptr);
238: PetscMemzero(ptr,n);
239: objects[numberobjects++] = ptr;
240: break;
241: }
242: case 1: {
243: PetscInt i;
244: MPI_Bcast(&i,1,MPIU_INT,0,comm);
245: PetscFree(objects[i]);
246: objects[i] = 0;
247: break;
248: }
249: case 2: {
250: PetscInt i;
251: PetscErrorCode (*f)(MPI_Comm,void*);
252: MPI_Bcast(&i,1,MPIU_INT,0,comm);
253: MPI_Bcast(&f,1,MPIU_INT,0,comm);
254: (*f)(comm,objects[i]);
255: break;
256: }
257: case 3: {
258: exitwhileloop = PETSC_TRUE;
259: break;
260: }
261: default:
262: SETERRQ1(PETSC_ERR_PLIB,"Unknown OpenMP command %D",command);
263: }
264: }
265: return(0);
266: }
270: /*@C
271: PetscOpenMPNew - Creates a "c struct" on all nodes of an OpenMP communicator
273: Collective on MPI_Comm
275: Level: developer
276:
277: .seealso: PetscOpenMPMerge()
279: @*/
280: PetscErrorCode PetscOpenMPNew(MPI_Comm comm,size_t n,void **ptr)
281: {
283: PetscInt command = 0;
286: if (!used_PetscOpenMP) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not using OpenMP feature of PETSc");
288: MPI_Bcast(&command,1,MPIU_INT,0,comm);
289: MPI_Bcast(&n,1,MPI_INT,0,comm); /* may be wrong size here since size_t */
290: /* cannot use PetscNew() cause it requires struct argument */
291: PetscMalloc(n,ptr);
292: PetscMemzero(*ptr,n);
293: objects[numberobjects++] = *ptr;
294: return(0);
295: }
299: /*@C
300: PetscOpenMPFree - Frees a "c struct" on all nodes of an OpenMP communicator
302: Collective on MPI_Comm
304: Level: developer
305:
306: .seealso: PetscOpenMPMerge(), PetscOpenMPNew()
308: @*/
309: PetscErrorCode PetscOpenMPFree(MPI_Comm comm,void *ptr)
310: {
312: PetscInt command = 1,i;
315: if (!used_PetscOpenMP) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not using OpenMP feature of PETSc");
317: MPI_Bcast(&command,1,MPIU_INT,0,comm);
318: for (i=0; i<numberobjects; i++) {
319: if (objects[i] == ptr) {
320: MPI_Bcast(&i,1,MPIU_INT,0,comm);
321: PetscFree(ptr);
322: objects[i] = 0;
323: return(0);
324: }
325: }
326: SETERRQ(PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscOpenMPNew()");
327: PetscFunctionReturn(ierr);
328: }
332: /*@C
333: PetscOpenMPRun - runs a function on all the processes of a node
335: Collective on MPI_Comm
337: Level: developer
338:
339: .seealso: PetscOpenMPMerge(), PetscOpenMPNew(), PetscOpenMPFree()
341: @*/
342: PetscErrorCode PetscOpenMPRun(MPI_Comm comm,PetscErrorCode (*f)(MPI_Comm,void *),void *ptr)
343: {
345: PetscInt command = 2,i;
348: if (!used_PetscOpenMP) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not using OpenMP feature of PETSc");
350: MPI_Bcast(&command,1,MPIU_INT,0,comm);
351: for (i=0; i<numberobjects; i++) {
352: if (objects[i] == ptr) {
353: MPI_Bcast(&i,1,MPIU_INT,0,comm);
354: MPI_Bcast(&f,1,MPIU_INT,0,comm);
355: (*f)(comm,ptr);
356: return(0);
357: }
358: }
359: SETERRQ(PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscOpenMPNew()");
360: PetscFunctionReturn(ierr);
361: }