Actual source code: ex2.c
2: static char help[] = "Reads a a simple unstructured grid from a file. Partitions it,\n\
3: and distributes the grid data accordingly\n\n";
5: /*T
6: Concepts: Mat^partitioning a matrix;
7: Processors: n
8: T*/
10: /*
11: Updates of this example MAY be found at
12: http://www.mcs.anl.gov/petsc/src/dm/ao/examples/tutorials/ex2.c
14: This is a very basic, even crude, example of managing an unstructured
15: grid in parallel.
17: This particular code is for a Galerkin-style finite element method.
19: After the calls below, each processor will have
20: 1) a list of elements it "owns"; for each "owned" element it will have the global
21: numbering of the three vertices; stored in gdata->ele;
22: 2) a list of vertices it "owns". For each owned it will have the x and y
23: coordinates; stored in gdata->vert
25: It will not have
26: 1) list of ghost elements (since they are not needed for traditional
27: Galerkin style finite element methods). For various finite volume methods
28: you may need the ghost element lists, these may be generated using the
29: element neighbor information given in the file database.
31: To compute the local element stiffness matrix and load vector, each processor
32: will need the vertex coordinates for all of the vertices on the locally
33: "owned" elements. This data could be obtained by doing the appropriate vector
34: scatter on the data stored in gdata->vert; we haven't had time to demonstrate this.
36: Clearly writing a complete parallel unstructured grid code with PETSc is still
37: a good deal of work and requires a lot of application level coding. BUT, at least
38: PETSc can manage all of the nonlinear and linear solvers (including matrix assembly
39: etc.), which allows the programmer to concentrate his or her efforts on managing
40: the unstructured grid. The PETSc team is developing additional library objects
41: to help manage parallel unstructured grid computations. Unfortunately we have
42: not had time to complete these yet, so the application programmer still must
43: manage much of the parallel grid manipulation as indicated below.
45: */
47: /*
48: Include "petscmat.h" so that we can use matrices.
49: automatically includes:
50: petsc.h - base PETSc routines petscvec.h - vectors
51: petscsys.h - system routines petscmat.h - matrices
52: petscis.h - index sets petscviewer.h - viewers
54: Include "petscao.h" allows use of the AO (application ordering) commands,
55: used below for renumbering the vertex numbers after the partitioning.
57: Include "petscbt.h" for managing logical bit arrays that are used to
58: conserve space. Note that the code does use order N bit arrays on each
59: processor so is theoretically not scalable, but even with 64 million
60: vertices it will only need temporarily 8 megabytes of memory for the
61: bit array so one can still do very large problems with this approach,
62: since the bit arrays are freed before the vectors and matrices are
63: created.
64: */
65: #include petscmat.h
66: #include petscao.h
67: #include petscbt.h
69: /*
70: This is the user-defined grid data context
71: */
72: typedef struct {
73: PetscInt n_vert,n_ele;
74: PetscInt mlocal_vert,mlocal_ele;
75: PetscInt *ele;
76: PetscScalar *vert;
77: PetscInt *ia,*ja;
78: IS isnewproc;
79: PetscInt *localvert,nlocal; /* used to stash temporarily old global vertex number of new vertex */
80: } GridData;
82: /*
83: Variables on all processors:
84: n_vert - total number of vertices
85: mlocal_vert - number of vertices on this processor
86: vert - x,y coordinates of local vertices
88: n_ele - total number of elements
89: mlocal_ele - number of elements on this processor
90: ele - vertices of elements on this processor
92: ia, ja - adjacency graph of elements (for partitioning)
93:
94: Variables on processor 0 during data reading from file:
95: mmlocal_vert[i] - number of vertices on each processor
96: tmpvert - x,y coordinates of vertices on any processor (as read in)
98: mmlocal_ele[i] - number of elements on each processor
100: tmpia, tmpja - adjacency graph of elements for other processors
102: Notes:
103: The code below has a great deal of IO (print statements). This is to allow one to track
104: the renumbering and movement of data among processors. In an actual
105: production run, IO of this type would be deactivated.
107: To use the ParMETIS partitioner run with the option -mat_partitioning_type parmetis
108: otherwise it defaults to the initial element partitioning induced when the data
109: is read in.
111: To understand the parallel performance of this type of code, it is important
112: to profile the time spent in various events in the code; running with the
113: option -log_summary will indicate how much time is spent in the routines
114: below. Of course, for very small problems, such as the sample grid used here,
115: the profiling results are meaningless.
116: */
127: int main(int argc,char **args)
128: {
130: PetscEvent READ_EVENT,PARTITION_ELEMENT_EVENT,MOVE_ELEMENT_EVENT;
131: PetscEvent PARTITION_VERTEX_EVENT,MOVE_VERTEX_EVENT;
132: GridData gdata;
134: PetscInitialize(&argc,&args,(char *)0,help);
143: DataRead(&gdata);
146: DataPartitionElements(&gdata);
149: DataMoveElements(&gdata);
152: DataPartitionVertices(&gdata);
155: DataMoveVertices(&gdata);
157: DataDestroy(&gdata);
159: PetscFinalize();
160: return 0;
161: }
166: /*
167: Reads in the grid data from a file; each processor is naively
168: assigned a continuous chunk of vertex and element data. Later the data
169: will be partitioned and moved to the appropriate processor.
170: */
171: PetscErrorCode DataRead(GridData *gdata)
172: {
173: PetscMPIInt rank,size;
174: PetscInt n_vert,*mmlocal_vert,mlocal_vert,i,*ia,*ja,cnt,j;
175: PetscInt mlocal_ele,*mmlocal_ele,*ele,*tmpele,n_ele,net,a1,a2,a3;
176: PetscInt *iatmp,*jatmp;
178: char msg[128];
179: PetscScalar *vert,*tmpvert;
180: MPI_Status status;
183: /*
184: Processor 0 opens the file, reads in chunks of data and sends a portion off to
185: each other processor.
187: Note: For a truely scalable IO portion of the code, one would store
188: the grid data in a binary file and use MPI-IO commands to have each
189: processor read in the parts that it needs. However in most circumstances
190: involving up to a say a million nodes and 100 processors this approach
191: here is fine.
192: */
193: MPI_Comm_size(PETSC_COMM_WORLD,&size);
194: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
196: if (!rank) {
197: FILE *fd;
198: fd = fopen("usgdata","r"); if (!fd) SETERRQ(1,"Cannot open grid file");
200: /* read in number of vertices */
201: fgets(msg,128,fd);
202: printf("File msg:%s",msg);
203: fscanf(fd,"Number Vertices = %d\n",&n_vert);
204: printf("Number of grid vertices %d\n",n_vert);
206: /* broadcast number of vertices to all processors */
207: MPI_Bcast(&n_vert,1,MPI_INT,0,PETSC_COMM_WORLD);
208: mlocal_vert = n_vert/size + ((n_vert % size) > 0);
210: /*
211: allocate enough room for the first processor to keep track of how many
212: vertices are assigned to each processor. Splitting vertices equally amoung
213: all processors.
214: */
215: PetscMalloc(size*sizeof(PetscInt),&mmlocal_vert);
216: for (i=0; i<size; i++) {
217: mmlocal_vert[i] = n_vert/size + ((n_vert % size) > i);
218: printf("Processor %d assigned %d vertices\n",i,mmlocal_vert[i]);
219: }
221: /*
222: Read in vertices assigned to first processor
223: */
224: PetscMalloc(2*mmlocal_vert[0]*sizeof(PetscScalar),&vert);
225: printf("Vertices assigned to processor 0\n");
226: for (i=0; i<mlocal_vert; i++) {
227: fscanf(fd,"%d %lf %lf\n",&cnt,vert+2*i,vert+2*i+1);
228: printf("%d %g %g\n",cnt,PetscRealPart(vert[2*i]),PetscRealPart(vert[2*i+1]));
229: }
231: /*
232: Read in vertices for all the other processors
233: */
234: PetscMalloc(2*mmlocal_vert[0]*sizeof(PetscScalar),&tmpvert);
235: for (j=1; j<size; j++) {
236: printf("Vertices assigned to processor %d\n",j);
237: for (i=0; i<mmlocal_vert[j]; i++) {
238: fscanf(fd,"%d %lf %lf\n",&cnt,tmpvert+2*i,tmpvert+2*i+1);
239: printf("%d %g %g\n",cnt,tmpvert[2*i],tmpvert[2*i+1]);
240: }
241: MPI_Send(tmpvert,2*mmlocal_vert[j],MPIU_SCALAR,j,0,PETSC_COMM_WORLD);
242: }
243: PetscFree(tmpvert);
244: PetscFree(mmlocal_vert);
246: fscanf(fd,"Number Elements = %d\n",&n_ele);
247: printf("Number of grid elements %d\n",n_ele);
249: /*
250: Broadcast number of elements to all processors
251: */
252: MPI_Bcast(&n_ele,1,MPI_INT,0,PETSC_COMM_WORLD);
253: mlocal_ele = n_ele/size + ((n_ele % size) > 0);
255: /*
256: Allocate enough room for the first processor to keep track of how many
257: elements are assigned to each processor.
258: */
259: PetscMalloc(size*sizeof(PetscInt),&mmlocal_ele);
260: for (i=0; i<size; i++) {
261: mmlocal_ele[i] = n_ele/size + ((n_ele % size) > i);
262: printf("Processor %d assigned %d elements\n",i,mmlocal_ele[i]);
263: }
264:
265: /*
266: read in element information for the first processor
267: */
268: PetscMalloc(3*mmlocal_ele[0]*sizeof(PetscInt),&ele);
269: printf("Elements assigned to processor 0\n");
270: for (i=0; i<mlocal_ele; i++) {
271: fscanf(fd,"%d %d %d %d\n",&cnt,ele+3*i,ele+3*i+1,ele+3*i+2);
272: printf("%d %d %d %d\n",cnt,ele[3*i],ele[3*i+1],ele[3*i+2]);
273: }
275: /*
276: Read in elements for all the other processors
277: */
278: PetscMalloc(3*mmlocal_ele[0]*sizeof(PetscInt),&tmpele);
279: for (j=1; j<size; j++) {
280: printf("Elements assigned to processor %d\n",j);
281: for (i=0; i<mmlocal_ele[j]; i++) {
282: fscanf(fd,"%d %d %d %d\n",&cnt,tmpele+3*i,tmpele+3*i+1,tmpele+3*i+2);
283: printf("%d %d %d %d\n",cnt,tmpele[3*i],tmpele[3*i+1],tmpele[3*i+2]);
284: }
285: MPI_Send(tmpele,3*mmlocal_ele[j],MPI_INT,j,0,PETSC_COMM_WORLD);
286: }
287: PetscFree(tmpele);
289: /*
290: Read in element neighbors for processor 0
291: We don't know how many spaces in ja[] to allocate so we allocate
292: 3*the number of local elements, this is the maximum it could be
293: */
294: PetscMalloc((mlocal_ele+1)*sizeof(PetscInt),&ia);
295: PetscMalloc((3*mlocal_ele+1)*sizeof(PetscInt),&ja);
296: net = 0;
297: ia[0] = 0;
298: printf("Element neighbors on processor 0\n");
299: fgets(msg,128,fd);
300: for (i=0; i<mlocal_ele; i++) {
301: fscanf(fd,"%d %d %d %d\n",&cnt,&a1,&a2,&a3);
302: printf("%d %d %d %d\n",cnt,a1,a2,a3);
303: if (a1 >= 0) {ja[net++] = a1;}
304: if (a2 >= 0) {ja[net++] = a2;}
305: if (a3 >= 0) {ja[net++] = a3;}
306: ia[i+1] = net;
307: }
309: printf("ia values for processor 0\n");
310: for (i=0; i<mlocal_ele+1; i++) {
311: printf("%d ",ia[i]);
312: }
313: printf("\n");
314: printf("ja values for processor 0\n");
315: for (i=0; i<ia[mlocal_ele]; i++) {
316: printf("%d ",ja[i]);
317: }
318: printf("\n");
320: /*
321: Read in element neighbor information for all other processors
322: */
323: PetscMalloc((mlocal_ele+1)*sizeof(PetscInt),&iatmp);
324: PetscMalloc((3*mlocal_ele+1)*sizeof(PetscInt),&jatmp);
325: for (j=1; j<size; j++) {
326: net = 0;
327: iatmp[0] = 0;
328: printf("Element neighbors on processor %d\n",j);
329: for (i=0; i<mmlocal_ele[j]; i++) {
330: fscanf(fd,"%d %d %d %d\n",&cnt,&a1,&a2,&a3);
331: printf("%d %d %d %d\n",cnt,a1,a2,a3);
332: if (a1 >= 0) {jatmp[net++] = a1;}
333: if (a2 >= 0) {jatmp[net++] = a2;}
334: if (a3 >= 0) {jatmp[net++] = a3;}
335: iatmp[i+1] = net;
336: }
338: printf("ia values for processor %d\n",j);
339: for (i=0; i<mmlocal_ele[j]+1; i++) {
340: printf("%d ",iatmp[i]);
341: }
342: printf("\n");
343: printf("ja values for processor %d\n",j);
344: for (i=0; i<iatmp[mmlocal_ele[j]]; i++) {
345: printf("%d ",jatmp[i]);
346: }
347: printf("\n");
349: /* send graph off to appropriate processor */
350: MPI_Send(iatmp,mmlocal_ele[j]+1,MPI_INT,j,0,PETSC_COMM_WORLD);
351: MPI_Send(jatmp,iatmp[mmlocal_ele[j]],MPI_INT,j,0,PETSC_COMM_WORLD);
352: }
353: PetscFree(iatmp);
354: PetscFree(jatmp);
355: PetscFree(mmlocal_ele);
357: fclose(fd);
358: } else {
359: /*
360: We are not the zeroth processor so we do not open the file
361: rather we wait for processor 0 to send us our data.
362: */
364: /* receive total number of vertices */
365: MPI_Bcast(&n_vert,1,MPI_INT,0,PETSC_COMM_WORLD);
366: mlocal_vert = n_vert/size + ((n_vert % size) > rank);
368: /* receive vertices */
369: PetscMalloc(2*(mlocal_vert+1)*sizeof(PetscScalar),&vert);
370: MPI_Recv(vert,2*mlocal_vert,MPIU_SCALAR,0,0,PETSC_COMM_WORLD,&status);
372: /* receive total number of elements */
373: MPI_Bcast(&n_ele,1,MPI_INT,0,PETSC_COMM_WORLD);
374: mlocal_ele = n_ele/size + ((n_ele % size) > rank);
376: /* receive elements */
377: PetscMalloc(3*(mlocal_ele+1)*sizeof(PetscInt),&ele);
378: MPI_Recv(ele,3*mlocal_ele,MPI_INT,0,0,PETSC_COMM_WORLD,&status);
380: /* receive element adjacency graph */
381: PetscMalloc((mlocal_ele+1)*sizeof(PetscInt),&ia);
382: MPI_Recv(ia,mlocal_ele+1,MPI_INT,0,0,PETSC_COMM_WORLD,&status);
384: PetscMalloc((ia[mlocal_ele]+1)*sizeof(PetscInt),&ja);
385: MPI_Recv(ja,ia[mlocal_ele],MPI_INT,0,0,PETSC_COMM_WORLD,&status);
386: }
388: gdata->n_vert = n_vert;
389: gdata->n_ele = n_ele;
390: gdata->mlocal_vert = mlocal_vert;
391: gdata->mlocal_ele = mlocal_ele;
392: gdata->ele = ele;
393: gdata->vert = vert;
395: gdata->ia = ia;
396: gdata->ja = ja;
398: return(0);
399: }
404: /*
405: Given the grid data spread across the processors, determines a
406: new partitioning of the CELLS (elements) to reduce the number of cut edges between
407: cells (elements).
408: */
409: PetscErrorCode DataPartitionElements(GridData *gdata)
410: {
411: Mat Adj; /* adjacency matrix */
412: PetscInt *ia,*ja;
413: PetscInt mlocal_ele,n_ele;
414: PetscErrorCode ierr;
415: MatPartitioning part;
416: IS isnewproc;
419: n_ele = gdata->n_ele;
420: mlocal_ele = gdata->mlocal_ele;
422: ia = gdata->ia;
423: ja = gdata->ja;
425: /*
426: Create the adjacency graph matrix
427: */
428: MatCreateMPIAdj(PETSC_COMM_WORLD,mlocal_ele,n_ele,ia,ja,PETSC_NULL,&Adj);
430: /*
431: Create the partioning object
432: */
433: MatPartitioningCreate(PETSC_COMM_WORLD,&part);
434: MatPartitioningSetAdjacency(part,Adj);
435: MatPartitioningSetFromOptions(part);
436: MatPartitioningApply(part,&isnewproc);
437: MatPartitioningDestroy(part);
439: /*
440: isnewproc - indicates for each local element the new processor it is assigned to
441: */
442: PetscPrintf(PETSC_COMM_WORLD,"New processor assignment for each element\n");
443: ISView(isnewproc,PETSC_VIEWER_STDOUT_WORLD);
444: gdata->isnewproc = isnewproc;
446: /*
447: Free the adjacency graph data structures
448: */
449: MatDestroy(Adj);
452: return(0);
453: }
457: /*
458: Moves the grid element data to be on the correct processor for the new
459: element partitioning.
460: */
461: PetscErrorCode DataMoveElements(GridData *gdata)
462: {
464: PetscInt *counts,i,*idx;
465: PetscMPIInt rank,size;
466: Vec vele,veleold;
467: PetscScalar *array;
468: IS isscat,isnum;
469: VecScatter vecscat;
473: MPI_Comm_size(PETSC_COMM_WORLD,&size);
474: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
476: /*
477: Determine how many elements are assigned to each processor
478: */
479: PetscMalloc(size*sizeof(PetscInt),&counts);
480: ISPartitioningCount(gdata->isnewproc,counts);
482: /*
483: Create a vector to contain the newly ordered element information
485: Note: we use vectors to communicate this data since we can use the powerful
486: VecScatter routines to get the data to the correct location. This is a little
487: wasteful since the vectors hold double precision numbers instead of integers,
488: but since this is just a setup phase in the entire numerical computation that
489: is only called once it is not a measureable performance bottleneck.
490: */
491: VecCreate(PETSC_COMM_WORLD,&vele);
492: VecSetSizes(vele,3*counts[rank],PETSC_DECIDE);
493: VecSetFromOptions(vele);
495: /*
496: Create an index set from the isnewproc index set to indicate the mapping TO
497: */
498: ISPartitioningToNumbering(gdata->isnewproc,&isnum);
499: ISDestroy(gdata->isnewproc);
500: /*
501: There are three data items per cell (element), the integer vertex numbers of its three
502: coordinates (we convert to double to use the scatter) (one can think
503: of the vectors of having a block size of 3, then there is one index in idx[] for each element)
504: */
505: ISGetIndices(isnum,&idx);
506: for (i=0; i<gdata->mlocal_ele; i++) {
507: idx[i] *= 3;
508: }
509: ISCreateBlock(PETSC_COMM_WORLD,3,gdata->mlocal_ele,idx,&isscat);
510: ISRestoreIndices(isnum,&idx);
511: ISDestroy(isnum);
513: /*
514: Create a vector to contain the original vertex information for each element
515: */
516: VecCreateSeq(PETSC_COMM_SELF,3*gdata->mlocal_ele,&veleold);
517: VecGetArray(veleold,&array);
518: for (i=0; i<3*gdata->mlocal_ele; i++) {
519: array[i] = gdata->ele[i];
520: }
521: VecRestoreArray(veleold,&array);
522:
523: /*
524: Scatter the element vertex information (still in the original vertex ordering) to the correct processor
525: */
526: VecScatterCreate(veleold,PETSC_NULL,vele,isscat,&vecscat);
527: ISDestroy(isscat);
528: VecScatterBegin(veleold,vele,INSERT_VALUES,SCATTER_FORWARD,vecscat);
529: VecScatterEnd(veleold,vele,INSERT_VALUES,SCATTER_FORWARD,vecscat);
530: VecScatterDestroy(vecscat);
531: VecDestroy(veleold);
533: /*
534: Put the element vertex data into a new allocation of the gdata->ele
535: */
536: PetscFree(gdata->ele);
537: gdata->mlocal_ele = counts[rank];
538: PetscFree(counts);
539: PetscMalloc(3*gdata->mlocal_ele*sizeof(PetscInt),&gdata->ele);
540: VecGetArray(vele,&array);
541: for (i=0; i<3*gdata->mlocal_ele; i++) {
542: gdata->ele[i] = (int)PetscRealPart(array[i]);
543: }
544: VecRestoreArray(vele,&array);
545: VecDestroy(vele);
547: PetscPrintf(PETSC_COMM_WORLD,"Old vertex numbering in new element ordering\n");
548: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"Processor %d\n",rank);
549: for (i=0; i<gdata->mlocal_ele; i++) {
550: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%d %d %d %d\n",i,gdata->ele[3*i],gdata->ele[3*i+1],
551: gdata->ele[3*i+2]);
552: }
553: PetscSynchronizedFlush(PETSC_COMM_WORLD);
555: return(0);
556: }
560: /*
561: Given the newly partitioned cells (elements), this routine partitions the
562: vertices.
564: The code is not completely scalable since it requires
565: 1) O(n_vert) bits per processor memory
566: 2) uses O(size) stages of communication; each of size O(n_vert) bits
567: 3) it is sequential (each processor marks it vertices ONLY after all processors
568: to the left have marked theirs.
569: 4) the amount of work on the last processor is O(n_vert)
571: The algorithm also does not take advantage of vertices that are "interior" to a
572: processors elements (that is; is not contained in any element on another processor).
573: A better algorithm would first have all processors determine "interior" vertices and
574: make sure they are retained on that processor before listing "boundary" vertices.
576: The algorithm is:
577: a) each processor waits for a message from the left containing mask of all marked vertices
578: b) it loops over all local elements, generating a list of vertices it will
579: claim (not claiming ones that have already been marked in the bit-array)
580: it claims at most n_vert/size vertices
581: c) it sends to the right the mask
583: This is a quick-and-dirty implementation; it should work fine for many problems,
584: but will need to be replaced once profiling shows that it takes a large amount of
585: time. An advantage is it requires no searching or sorting.
586:
587: */
588: PetscErrorCode DataPartitionVertices(GridData *gdata)
589: {
590: PetscInt n_vert = gdata->n_vert,*localvert;
591: PetscInt mlocal_ele = gdata->mlocal_ele,*ele = gdata->ele,i,j,nlocal = 0,nmax;
592: PetscMPIInt rank,size;
594: PetscBT mask;
595: MPI_Status status;
598: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
599: MPI_Comm_size(PETSC_COMM_WORLD,&size);
601: /*
602: Allocated space to store bit-array indicting vertices marked
603: */
604: PetscBTCreate(n_vert,mask);
606: /*
607: All processors except last can have a maximum of n_vert/size vertices assigned
608: (because last processor needs to handle any leftovers)
609: */
610: nmax = n_vert/size;
611: if (rank == size-1) {
612: nmax = n_vert;
613: }
615: /*
616: Receive list of marked vertices from left
617: */
618: if (rank) {
619: MPI_Recv(mask,PetscBTLength(n_vert),MPI_CHAR,rank-1,0,PETSC_COMM_WORLD,&status);
620: }
622: if (rank == size-1) {
623: /* last processor gets all the rest */
624: for (i=0; i<n_vert; i++) {
625: if (!PetscBTLookup(mask,i)) {
626: nlocal++;
627: }
628: }
629: nmax = nlocal;
630: }
632: /*
633: Now we know how many are local, allocated enough space for them and mark them
634: */
635: PetscMalloc((nmax+1)*sizeof(PetscInt),&localvert);
637: /* generate local list and fill in mask */
638: nlocal = 0;
639: if (rank < size-1) {
640: /* count my vertices */
641: for (i=0; i<mlocal_ele; i++) {
642: for (j=0; j<3; j++) {
643: if (!PetscBTLookupSet(mask,ele[3*i+j])) {
644: localvert[nlocal++] = ele[3*i+j];
645: if (nlocal >= nmax) goto foundenough2;
646: }
647: }
648: }
649: foundenough2:;
650: } else {
651: /* last processor gets all the rest */
652: for (i=0; i<n_vert; i++) {
653: if (!PetscBTLookup(mask,i)) {
654: localvert[nlocal++] = i;
655: }
656: }
657: }
658: /*
659: Send bit mask on to next processor
660: */
661: if (rank < size-1) {
662: MPI_Send(mask,PetscBTLength(n_vert),MPI_CHAR,rank+1,0,PETSC_COMM_WORLD);
663: }
664: PetscBTDestroy(mask);
666: gdata->localvert = localvert;
667: gdata->nlocal = nlocal;
669: /* print lists of owned vertices */
670: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] Number vertices assigned %d\n",rank,nlocal);
671: PetscSynchronizedFlush(PETSC_COMM_WORLD);
672: PetscIntView(nlocal,localvert,PETSC_VIEWER_STDOUT_WORLD);
674: return(0);
675: }
679: /*
680: Given the partitioning of the vertices; renumbers the element vertex lists for the
681: new vertex numbering and moves the vertex coordinate values to the correct processor
682: */
683: PetscErrorCode DataMoveVertices(GridData *gdata)
684: {
685: AO ao;
687: PetscMPIInt rank;
688: PetscInt i;
689: Vec vert,overt;
690: VecScatter vecscat;
691: IS isscat;
692: PetscScalar *avert;
695: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
697: /* ---------------------------------------------------------------------
698: Create a global reodering of the vertex numbers
699: */
700: AOCreateBasic(PETSC_COMM_WORLD,gdata->nlocal,gdata->localvert,PETSC_NULL,&ao);
702: /*
703: Change the element vertex information to the new vertex numbering
704: */
705: AOApplicationToPetsc(ao,3*gdata->mlocal_ele,gdata->ele);
706: PetscPrintf(PETSC_COMM_WORLD,"New vertex numbering in new element ordering\n");
707: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"Processor %d\n",rank);
708: for (i=0; i<gdata->mlocal_ele; i++) {
709: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%d %d %d %d\n",i,gdata->ele[3*i],gdata->ele[3*i+1],
710: gdata->ele[3*i+2]);
711: }
712: PetscSynchronizedFlush(PETSC_COMM_WORLD);
714: /*
715: Destroy the AO that is no longer needed
716: */
717: AODestroy(ao);
719: /* --------------------------------------------------------------------
720: Finally ship the vertex coordinate information to its owning process
721: note, we do this in a way very similar to what was done for the element info
722: */
723: /* create a vector to contain the newly ordered vertex information */
724: VecCreateSeq(PETSC_COMM_SELF,2*gdata->nlocal,&vert);
726: /* create a vector to contain the old ordered vertex information */
727: VecCreateMPIWithArray(PETSC_COMM_WORLD,2*gdata->mlocal_vert,PETSC_DECIDE,gdata->vert,
728: &overt);
730: /*
731: There are two data items per vertex, the x and y coordinates (i.e. one can think
732: of the vectors of having a block size of 2 and there is one index in localvert[] for each block)
733: */
734: for (i=0; i<gdata->nlocal; i++) gdata->localvert[i] *= 2;
735: ISCreateBlock(PETSC_COMM_WORLD,2,gdata->nlocal,gdata->localvert,&isscat);
736: PetscFree(gdata->localvert);
738: /*
739: Scatter the element vertex information to the correct processor
740: */
741: VecScatterCreate(overt,isscat,vert,PETSC_NULL,&vecscat);
742: ISDestroy(isscat);
743: VecScatterBegin(overt,vert,INSERT_VALUES,SCATTER_FORWARD,vecscat);
744: VecScatterEnd(overt,vert,INSERT_VALUES,SCATTER_FORWARD,vecscat);
745: VecScatterDestroy(vecscat);
747: VecDestroy(overt);
748: PetscFree(gdata->vert);
749:
750: /*
751: Put resulting vertex information into gdata->vert array
752: */
753: PetscMalloc(2*gdata->nlocal*sizeof(PetscScalar),&gdata->vert);
754: VecGetArray(vert,&avert);
755: PetscMemcpy(gdata->vert,avert,2*gdata->nlocal*sizeof(PetscScalar));
756: VecRestoreArray(vert,&avert);
757: gdata->mlocal_vert = gdata->nlocal;
758: VecDestroy(vert);
760: PetscPrintf(PETSC_COMM_WORLD,"Vertex coordinates in new numbering\n");
761: for (i=0; i<gdata->mlocal_vert; i++) {
762: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"(%g,%g)\n",gdata->vert[2*i],gdata->vert[2*i+1]);
763: }
764: PetscSynchronizedFlush(PETSC_COMM_WORLD);
766: return(0);
767: }
772: PetscErrorCode DataDestroy(GridData *gdata)
773: {
777: PetscFree(gdata->ele);
778: PetscFree(gdata->vert);
779: return(0);
780: }