Actual source code: csrperm.c
1: #define PETSCMAT_DLL
3: /*
4: Defines basic operations for the MATSEQCSRPERM matrix class.
5: This class is derived from the MATSEQAIJ class and retains the
6: compressed row storage (aka Yale sparse matrix format) but augments
7: it with some permutation information that enables some operations
8: to be more vectorizable. A physically rearranged copy of the matrix
9: may be stored if the user desires.
11: Eventually a variety of permutations may be supported.
12: */
14: #include src/mat/impls/aij/seq/aij.h
16: #define NDIM 512
17: /* NDIM specifies how many rows at a time we should work with when
18: * performing the vectorized mat-vec. This depends on various factors
19: * such as vector register length, etc., and I really need to add a
20: * way for the user (or the library) to tune this. I'm setting it to
21: * 512 for now since that is what Ed D'Azevedo was using in his Fortran
22: * routines. */
24: typedef struct {
25: PetscInt ngroup;
26: PetscInt *xgroup;
27: /* Denotes where groups of rows with same number of nonzeros
28: * begin and end, i.e., xgroup[i] gives us the position in iperm[]
29: * where the ith group begins. */
30: PetscInt *nzgroup; /* how many nonzeros each row that is a member of group i has. */
31: PetscInt *iperm; /* The permutation vector. */
33: /* Flag that indicates whether we need to clean up permutation
34: * information during the MatDestroy. */
35: PetscTruth CleanUpCSRPERM;
37: /* Some of this stuff is for Ed's recursive triangular solve.
38: * I'm not sure what I need yet. */
39: PetscInt blocksize;
40: PetscInt nstep;
41: PetscInt *jstart_list;
42: PetscInt *jend_list;
43: PetscInt *action_list;
44: PetscInt *ngroup_list;
45: PetscInt **ipointer_list;
46: PetscInt **xgroup_list;
47: PetscInt **nzgroup_list;
48: PetscInt **iperm_list;
50: /* We need to keep a pointer to MatAssemblyEnd_SeqAIJ because we
51: * actually want to call this function from within the
52: * MatAssemblyEnd_SeqCSRPERM function. Similarly, we also need
53: * MatDestroy_SeqAIJ and MatDuplicate_SeqAIJ. */
54: PetscErrorCode (*AssemblyEnd_SeqAIJ)(Mat,MatAssemblyType);
55: PetscErrorCode (*MatDestroy_SeqAIJ)(Mat);
56: PetscErrorCode (*MatDuplicate_SeqAIJ)(Mat,MatDuplicateOption,Mat*);
57: } Mat_SeqCSRPERM;
62: PetscErrorCode MatConvert_SeqCSRPERM_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
63: {
64: /* This routine is only called to convert a MATCSRPERM to its base PETSc type, */
65: /* so we will ignore 'MatType type'. */
67: Mat B = *newmat;
68: Mat_SeqCSRPERM *csrperm=(Mat_SeqCSRPERM*)A->spptr;
71: if (reuse == MAT_INITIAL_MATRIX) {
72: MatDuplicate(A,MAT_COPY_VALUES,&B);
73: }
75: /* Reset the original function pointers. */
76: B->ops->assemblyend = csrperm->AssemblyEnd_SeqAIJ;
77: B->ops->destroy = csrperm->MatDestroy_SeqAIJ;
78: B->ops->duplicate = csrperm->MatDuplicate_SeqAIJ;
80: /* Free everything in the Mat_SeqCSRPERM data structure.
81: * We don't free the Mat_SeqCSRPERM struct itself, as this will
82: * cause problems later when MatDestroy() tries to free it. */
83: if(csrperm->CleanUpCSRPERM) {
84: PetscFree(csrperm->xgroup);
85: PetscFree(csrperm->nzgroup);
86: PetscFree(csrperm->iperm);
87: }
89: /* Change the type of B to MATSEQAIJ. */
90: PetscObjectChangeTypeName( (PetscObject)B, MATSEQAIJ);
91:
92: *newmat = B;
93: return(0);
94: }
99: PetscErrorCode MatDestroy_SeqCSRPERM(Mat A)
100: {
102: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
104: /* We are going to convert A back into a SEQAIJ matrix, since we are
105: * eventually going to use MatDestroy_SeqAIJ() to destroy everything
106: * that is not specific to CSRPERM.
107: * In preparation for this, reset the operations pointers in A to
108: * their SeqAIJ versions. */
109: A->ops->assemblyend = csrperm->AssemblyEnd_SeqAIJ;
110: /* I suppose I don't actually need to point A->ops->assemblyend back
111: * to the right thing, but I do it anyway for completeness. */
112: A->ops->destroy = csrperm->MatDestroy_SeqAIJ;
113: A->ops->duplicate = csrperm->MatDuplicate_SeqAIJ;
115: /* Free everything in the Mat_SeqCSRPERM data structure.
116: * Note that we don't need to free the Mat_SeqCSRPERM struct
117: * itself, as MatDestroy() will do so. */
118: if(csrperm->CleanUpCSRPERM) {
119: PetscFree(csrperm->xgroup);
120: PetscFree(csrperm->nzgroup);
121: PetscFree(csrperm->iperm);
122: }
124: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
125: * to destroy everything that remains. */
126: PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);
127: /* Note that I don't call MatSetType(). I believe this is because that
128: * is only to be called when *building* a matrix. I could be wrong, but
129: * that is how things work for the SuperLU matrix class. */
130: (*A->ops->destroy)(A);
131: return(0);
132: }
134: PetscErrorCode MatDuplicate_SeqCSRPERM(Mat A, MatDuplicateOption op, Mat *M)
135: {
137: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
138: Mat_SeqCSRPERM *csrperm_dest = (Mat_SeqCSRPERM *) (*M)->spptr;
141: (*csrperm->MatDuplicate_SeqAIJ)(A,op,M);
142: PetscMemcpy((*M)->spptr,csrperm,sizeof(Mat_SeqCSRPERM));
143: /* Allocate space for, and copy the grouping and permutation info.
144: * I note that when the groups are initially determined in
145: * SeqCSRPERM_create_perm, xgroup and nzgroup may be sized larger than
146: * necessary. But at this point, we know how large they need to be, and
147: * allocate only the necessary amount of memory. So the duplicated matrix
148: * may actually use slightly less storage than the original! */
149: PetscMalloc(A->rmap.n*sizeof(PetscInt), csrperm_dest->iperm);
150: PetscMalloc((csrperm->ngroup+1)*sizeof(PetscInt), csrperm_dest->xgroup);
151: PetscMalloc((csrperm->ngroup)*sizeof(PetscInt), csrperm_dest->nzgroup);
152: PetscMemcpy(csrperm_dest->iperm,csrperm->iperm,sizeof(PetscInt)*A->rmap.n);
153: PetscMemcpy(csrperm_dest->xgroup,csrperm->xgroup,sizeof(PetscInt)*(csrperm->ngroup+1));
154: PetscMemcpy(csrperm_dest->nzgroup,csrperm->nzgroup,sizeof(PetscInt)*csrperm->ngroup);
155: return(0);
156: }
160: PetscErrorCode SeqCSRPERM_create_perm(Mat A)
161: {
162: PetscInt m; /* Number of rows in the matrix. */
163: Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data;
164: PetscInt *ia; /* From the CSR representation; points to the beginning of each row. */
165: PetscInt maxnz; /* Maximum number of nonzeros in any row. */
166: PetscInt *rows_in_bucket;
167: /* To construct the permutation, we sort each row into one of maxnz
168: * buckets based on how many nonzeros are in the row. */
169: PetscInt nz;
170: PetscInt *nz_in_row; /* the number of nonzero elements in row k. */
171: PetscInt *ipnz;
172: /* When constructing the iperm permutation vector,
173: * ipnz[nz] is used to point to the next place in the permutation vector
174: * that a row with nz nonzero elements should be placed.*/
175: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM*) A->spptr;
176: /* Points to the MATSEQCSRPERM-specific data in the matrix B. */
178: PetscInt i, ngroup, istart, ipos;
180: /* I really ought to put something in here to check if B is of
181: * type MATSEQCSRPERM and return an error code if it is not.
182: * Come back and do this! */
183:
184: m = A->rmap.n;
185: ia = a->i;
186:
187: /* Allocate the arrays that will hold the permutation vector. */
188: PetscMalloc(m*sizeof(PetscInt), &csrperm->iperm);
190: /* Allocate some temporary work arrays that will be used in
191: * calculating the permuation vector and groupings. */
192: PetscMalloc((m+1)*sizeof(PetscInt), &rows_in_bucket);
193: PetscMalloc((m+1)*sizeof(PetscInt), &ipnz);
194: PetscMalloc(m*sizeof(PetscInt), &nz_in_row);
196: /* Now actually figure out the permutation and grouping. */
198: /* First pass: Determine number of nonzeros in each row, maximum
199: * number of nonzeros in any row, and how many rows fall into each
200: * "bucket" of rows with same number of nonzeros. */
201: maxnz = 0;
202: for (i=0; i<m; i++) {
203: nz_in_row[i] = ia[i+1]-ia[i];
204: if (nz_in_row[i] > maxnz) maxnz = nz_in_row[i];
205: }
207: for (i=0; i<=maxnz; i++) {
208: rows_in_bucket[i] = 0;
209: }
210: for (i=0; i<m; i++) {
211: nz = nz_in_row[i];
212: rows_in_bucket[nz]++;
213: }
215: /* Allocate space for the grouping info. There will be at most (maxnz + 1)
216: * groups. (It is maxnz + 1 instead of simply maxnz because there may be
217: * rows with no nonzero elements.) If there are (maxnz + 1) groups,
218: * then xgroup[] must consist of (maxnz + 2) elements, since the last
219: * element of xgroup will tell us where the (maxnz + 1)th group ends.
220: * We allocate space for the maximum number of groups;
221: * that is potentially a little wasteful, but not too much so.
222: * Perhaps I should fix it later. */
223: PetscMalloc((maxnz+2)*sizeof(PetscInt), &csrperm->xgroup);
224: PetscMalloc((maxnz+1)*sizeof(PetscInt), &csrperm->nzgroup);
226: /* Second pass. Look at what is in the buckets and create the groupings.
227: * Note that it is OK to have a group of rows with no non-zero values. */
228: ngroup = 0;
229: istart = 0;
230: for (i=0; i<=maxnz; i++) {
231: if (rows_in_bucket[i] > 0) {
232: csrperm->nzgroup[ngroup] = i;
233: csrperm->xgroup[ngroup] = istart;
234: ngroup++;
235: istart += rows_in_bucket[i];
236: }
237: }
239: csrperm->xgroup[ngroup] = istart;
240: csrperm->ngroup = ngroup;
242: /* Now fill in the permutation vector iperm. */
243: ipnz[0] = 0;
244: for (i=0; i<maxnz; i++) {
245: ipnz[i+1] = ipnz[i] + rows_in_bucket[i];
246: }
248: for (i=0; i<m; i++) {
249: nz = nz_in_row[i];
250: ipos = ipnz[nz];
251: csrperm->iperm[ipos] = i;
252: ipnz[nz]++;
253: }
255: /* Clean up temporary work arrays. */
256: PetscFree(rows_in_bucket);
257: PetscFree(ipnz);
258: PetscFree(nz_in_row);
260: /* Since we've allocated some memory to hold permutation info,
261: * flip the CleanUpCSRPERM flag to true. */
262: csrperm->CleanUpCSRPERM = PETSC_TRUE;
263: return(0);
264: }
269: PetscErrorCode MatAssemblyEnd_SeqCSRPERM(Mat A, MatAssemblyType mode)
270: {
272: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM*) A->spptr;
273: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
275: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
276:
277: /* Since a MATSEQCSRPERM matrix is really just a MATSEQAIJ with some
278: * extra information, call the AssemblyEnd routine for a MATSEQAIJ.
279: * I'm not sure if this is the best way to do this, but it avoids
280: * a lot of code duplication.
281: * I also note that currently MATSEQCSRPERM doesn't know anything about
282: * the Mat_CompressedRow data structure that SeqAIJ now uses when there
283: * are many zero rows. If the SeqAIJ assembly end routine decides to use
284: * this, this may break things. (Don't know... haven't looked at it.) */
285: a->inode.use = PETSC_FALSE;
286: (*csrperm->AssemblyEnd_SeqAIJ)(A, mode);
288: /* Now calculate the permutation and grouping information. */
289: SeqCSRPERM_create_perm(A);
290: return(0);
291: }
293: #include src/inline/dot.h
297: PetscErrorCode MatMult_SeqCSRPERM(Mat A,Vec xx,Vec yy)
298: {
299: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
300: PetscScalar *x,*y,*aa;
302: PetscInt *aj,*ai;
303: #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM) && defined(notworking))
304: PetscInt i,j;
305: #endif
307: /* Variables that don't appear in MatMult_SeqAIJ. */
308: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
309: PetscInt *iperm; /* Points to the permutation vector. */
310: PetscInt *xgroup;
311: /* Denotes where groups of rows with same number of nonzeros
312: * begin and end in iperm. */
313: PetscInt *nzgroup;
314: PetscInt ngroup;
315: PetscInt igroup;
316: PetscInt jstart,jend;
317: /* jstart is used in loops to denote the position in iperm where a
318: * group starts; jend denotes the position where it ends.
319: * (jend + 1 is where the next group starts.) */
320: PetscInt iold,nz;
321: PetscInt istart,iend,isize;
322: PetscInt ipos;
323: PetscScalar yp[NDIM];
324: PetscInt ip[NDIM]; /* yp[] and ip[] are treated as vector "registers" for performing the mat-vec. */
326: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
327: #pragma disjoint(*x,*y,*aa)
328: #endif
331: VecGetArray(xx,&x);
332: VecGetArray(yy,&y);
333: aj = a->j; /* aj[k] gives column index for element aa[k]. */
334: aa = a->a; /* Nonzero elements stored row-by-row. */
335: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
337: /* Get the info we need about the permutations and groupings. */
338: iperm = csrperm->iperm;
339: ngroup = csrperm->ngroup;
340: xgroup = csrperm->xgroup;
341: nzgroup = csrperm->nzgroup;
342:
343: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM) && defined(notworking)
344: fortranmultcsrperm_(&m,x,ii,aj,aa,y);
345: #else
347: for (igroup=0; igroup<ngroup; igroup++) {
348: jstart = xgroup[igroup];
349: jend = xgroup[igroup+1] - 1;
350: nz = nzgroup[igroup];
352: /* Handle the special cases where the number of nonzeros per row
353: * in the group is either 0 or 1. */
354: if (nz == 0) {
355: for (i=jstart; i<=jend; i++) {
356: y[iperm[i]] = 0.0;
357: }
358: } else if (nz == 1) {
359: for (i=jstart; i<=jend; i++) {
360: iold = iperm[i];
361: ipos = ai[iold];
362: y[iold] = aa[ipos] * x[aj[ipos]];
363: }
364: } else {
365:
366: /* We work our way through the current group in chunks of NDIM rows
367: * at a time. */
369: for (istart=jstart; istart<=jend; istart+=NDIM) {
370: /* Figure out where the chunk of 'isize' rows ends in iperm.
371: * 'isize may of course be less than NDIM for the last chunk. */
372: iend = istart + (NDIM - 1);
373: if (iend > jend) { iend = jend; }
374: isize = iend - istart + 1;
376: /* Initialize the yp[] array that will be used to hold part of
377: * the permuted results vector, and figure out where in aa each
378: * row of the chunk will begin. */
379: for (i=0; i<isize; i++) {
380: iold = iperm[istart + i];
381: /* iold is a row number from the matrix A *before* reordering. */
382: ip[i] = ai[iold];
383: /* ip[i] tells us where the ith row of the chunk begins in aa. */
384: yp[i] = (PetscScalar) 0.0;
385: }
387: /* If the number of zeros per row exceeds the number of rows in
388: * the chunk, we should vectorize along nz, that is, perform the
389: * mat-vec one row at a time as in the usual CSR case. */
390: if (nz > isize) {
391: #if defined(PETSC_HAVE_CRAYC)
392: #pragma _CRI preferstream
393: #endif
394: for (i=0; i<isize; i++) {
395: #if defined(PETSC_HAVE_CRAYC)
396: #pragma _CRI prefervector
397: #endif
398: for (j=0; j<nz; j++) {
399: ipos = ip[i] + j;
400: yp[i] += aa[ipos] * x[aj[ipos]];
401: }
402: }
403: } else {
404: /* Otherwise, there are enough rows in the chunk to make it
405: * worthwhile to vectorize across the rows, that is, to do the
406: * matvec by operating with "columns" of the chunk. */
407: for (j=0; j<nz; j++) {
408: for(i=0; i<isize; i++) {
409: ipos = ip[i] + j;
410: yp[i] += aa[ipos] * x[aj[ipos]];
411: }
412: }
413: }
415: #if defined(PETSC_HAVE_CRAYC)
416: #pragma _CRI ivdep
417: #endif
418: /* Put results from yp[] into non-permuted result vector y. */
419: for (i=0; i<isize; i++) {
420: y[iperm[istart+i]] = yp[i];
421: }
422: } /* End processing chunk of isize rows of a group. */
423: } /* End handling matvec for chunk with nz > 1. */
424: } /* End loop over igroup. */
425: #endif
426: PetscLogFlops(2*a->nz - A->rmap.n);
427: VecRestoreArray(xx,&x);
428: VecRestoreArray(yy,&y);
429: return(0);
430: }
433: /* MatMultAdd_SeqCSRPERM() calculates yy = ww + A * xx.
434: * Note that the names I used to designate the vectors differs from that
435: * used in MatMultAdd_SeqAIJ(). I did this to keep my notation consistent
436: * with the MatMult_SeqCSRPERM() routine, which is very similar to this one. */
437: /*
438: I hate having virtually identical code for the mult and the multadd!!!
439: */
442: PetscErrorCode MatMultAdd_SeqCSRPERM(Mat A,Vec xx,Vec ww,Vec yy)
443: {
444: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
445: PetscScalar *x,*y,*w,*aa;
447: PetscInt *aj,*ai;
448: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTADDCSRPERM)
449: PetscInt i,j;
450: #endif
452: /* Variables that don't appear in MatMultAdd_SeqAIJ. */
453: Mat_SeqCSRPERM *csrperm;
454: PetscInt *iperm; /* Points to the permutation vector. */
455: PetscInt *xgroup;
456: /* Denotes where groups of rows with same number of nonzeros
457: * begin and end in iperm. */
458: PetscInt *nzgroup;
459: PetscInt ngroup;
460: PetscInt igroup;
461: PetscInt jstart,jend;
462: /* jstart is used in loops to denote the position in iperm where a
463: * group starts; jend denotes the position where it ends.
464: * (jend + 1 is where the next group starts.) */
465: PetscInt iold,nz;
466: PetscInt istart,iend,isize;
467: PetscInt ipos;
468: PetscScalar yp[NDIM];
469: PetscInt ip[NDIM];
470: /* yp[] and ip[] are treated as vector "registers" for performing
471: * the mat-vec. */
473: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
474: #pragma disjoint(*x,*y,*aa)
475: #endif
478: VecGetArray(xx,&x);
479: VecGetArray(yy,&y);
480: if (yy != ww) {
481: VecGetArray(ww,&w);
482: } else {
483: w = y;
484: }
486: aj = a->j; /* aj[k] gives column index for element aa[k]. */
487: aa = a->a; /* Nonzero elements stored row-by-row. */
488: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
490: /* Get the info we need about the permutations and groupings. */
491: csrperm = (Mat_SeqCSRPERM *) A->spptr;
492: iperm = csrperm->iperm;
493: ngroup = csrperm->ngroup;
494: xgroup = csrperm->xgroup;
495: nzgroup = csrperm->nzgroup;
496:
497: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDCSRPERM)
498: fortranmultaddcsrperm_(&m,x,ii,aj,aa,y,w);
499: #else
501: for(igroup=0; igroup<ngroup; igroup++) {
502: jstart = xgroup[igroup];
503: jend = xgroup[igroup+1] - 1;
505: nz = nzgroup[igroup];
507: /* Handle the special cases where the number of nonzeros per row
508: * in the group is either 0 or 1. */
509: if(nz == 0) {
510: for(i=jstart; i<=jend; i++) {
511: iold = iperm[i];
512: y[iold] = w[iold];
513: }
514: }
515: else if(nz == 1) {
516: for(i=jstart; i<=jend; i++) {
517: iold = iperm[i];
518: ipos = ai[iold];
519: y[iold] = w[iold] + aa[ipos] * x[aj[ipos]];
520: }
521: }
522: /* For the general case: */
523: else {
524:
525: /* We work our way through the current group in chunks of NDIM rows
526: * at a time. */
528: for(istart=jstart; istart<=jend; istart+=NDIM) {
529: /* Figure out where the chunk of 'isize' rows ends in iperm.
530: * 'isize may of course be less than NDIM for the last chunk. */
531: iend = istart + (NDIM - 1);
532: if(iend > jend) { iend = jend; }
533: isize = iend - istart + 1;
535: /* Initialize the yp[] array that will be used to hold part of
536: * the permuted results vector, and figure out where in aa each
537: * row of the chunk will begin. */
538: for(i=0; i<isize; i++) {
539: iold = iperm[istart + i];
540: /* iold is a row number from the matrix A *before* reordering. */
541: ip[i] = ai[iold];
542: /* ip[i] tells us where the ith row of the chunk begins in aa. */
543: yp[i] = w[iold];
544: }
546: /* If the number of zeros per row exceeds the number of rows in
547: * the chunk, we should vectorize along nz, that is, perform the
548: * mat-vec one row at a time as in the usual CSR case. */
549: if(nz > isize) {
550: #if defined(PETSC_HAVE_CRAYC)
551: #pragma _CRI preferstream
552: #endif
553: for(i=0; i<isize; i++) {
554: #if defined(PETSC_HAVE_CRAYC)
555: #pragma _CRI prefervector
556: #endif
557: for(j=0; j<nz; j++) {
558: ipos = ip[i] + j;
559: yp[i] += aa[ipos] * x[aj[ipos]];
560: }
561: }
562: }
563: /* Otherwise, there are enough rows in the chunk to make it
564: * worthwhile to vectorize across the rows, that is, to do the
565: * matvec by operating with "columns" of the chunk. */
566: else {
567: for(j=0; j<nz; j++) {
568: for(i=0; i<isize; i++) {
569: ipos = ip[i] + j;
570: yp[i] += aa[ipos] * x[aj[ipos]];
571: }
572: }
573: }
575: #if defined(PETSC_HAVE_CRAYC)
576: #pragma _CRI ivdep
577: #endif
578: /* Put results from yp[] into non-permuted result vector y. */
579: for(i=0; i<isize; i++) {
580: y[iperm[istart+i]] = yp[i];
581: }
582: } /* End processing chunk of isize rows of a group. */
583:
584: } /* End handling matvec for chunk with nz > 1. */
585: } /* End loop over igroup. */
587: #endif
588: PetscLogFlops(2*a->nz - A->rmap.n);
589: VecRestoreArray(xx,&x);
590: VecRestoreArray(yy,&y);
591: if (yy != ww) {
592: VecRestoreArray(ww,&w);
593: }
594: return(0);
595: }
598: /* MatConvert_SeqAIJ_SeqCSRPERM converts a SeqAIJ matrix into a
599: * SeqCSRPERM matrix. This routine is called by the MatCreate_SeqCSRPERM()
600: * routine, but can also be used to convert an assembled SeqAIJ matrix
601: * into a SeqCSRPERM one. */
605: PetscErrorCode MatConvert_SeqAIJ_SeqCSRPERM(Mat A,MatType type,MatReuse reuse,Mat *newmat)
606: {
608: Mat B = *newmat;
609: Mat_SeqCSRPERM *csrperm;
612: if (reuse == MAT_INITIAL_MATRIX) {
613: MatDuplicate(A,MAT_COPY_VALUES,&B);
614: }
616: PetscNew(Mat_SeqCSRPERM,&csrperm);
617: B->spptr = (void *) csrperm;
619: csrperm->AssemblyEnd_SeqAIJ = A->ops->assemblyend;
620: csrperm->MatDestroy_SeqAIJ = A->ops->destroy;
621: csrperm->MatDuplicate_SeqAIJ = A->ops->duplicate;
623: /* Set function pointers for methods that we inherit from AIJ but override. */
624: B->ops->duplicate = MatDuplicate_SeqCSRPERM;
625: B->ops->assemblyend = MatAssemblyEnd_SeqCSRPERM;
626: B->ops->destroy = MatDestroy_SeqCSRPERM;
627: B->ops->mult = MatMult_SeqCSRPERM;
628: B->ops->multadd = MatMultAdd_SeqCSRPERM;
630: /* If A has already been assembled, compute the permutation. */
631: if (A->assembled == PETSC_TRUE) {
632: SeqCSRPERM_create_perm(B);
633: }
634:
635: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqcsrperm_seqaij_C",
636: "MatConvert_SeqCSRPERM_SeqAIJ",MatConvert_SeqCSRPERM_SeqAIJ);
638: PetscObjectChangeTypeName((PetscObject)B,MATSEQCSRPERM);
639: *newmat = B;
640: return(0);
641: }
647: /*@C
648: MatCreateSeqCSRPERM - Creates a sparse matrix of type SEQCSRPERM.
649: This type inherits from AIJ, but calculates some additional permutation
650: information that is used to allow better vectorization of some
651: operations. At the cost of increased storage, the AIJ formatted
652: matrix can be copied to a format in which pieces of the matrix are
653: stored in ELLPACK format, allowing the vectorized matrix multiply
654: routine to use stride-1 memory accesses. As with the AIJ type, it is
655: important to preallocate matrix storage in order to get good assembly
656: performance.
657:
658: Collective on MPI_Comm
660: Input Parameters:
661: + comm - MPI communicator, set to PETSC_COMM_SELF
662: . m - number of rows
663: . n - number of columns
664: . nz - number of nonzeros per row (same for all rows)
665: - nnz - array containing the number of nonzeros in the various rows
666: (possibly different for each row) or PETSC_NULL
668: Output Parameter:
669: . A - the matrix
671: Notes:
672: If nnz is given then nz is ignored
674: Level: intermediate
676: .keywords: matrix, cray, sparse, parallel
678: .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues()
679: @*/
680: PetscErrorCode MatCreateSeqCSRPERM(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
681: {
685: MatCreate(comm,A);
686: MatSetSizes(*A,m,n,m,n);
687: MatSetType(*A,MATSEQCSRPERM);
688: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);
689: return(0);
690: }
696: PetscErrorCode MatCreate_SeqCSRPERM(Mat A)
697: {
701: MatSetType(A,MATSEQAIJ);
702: MatConvert_SeqAIJ_SeqCSRPERM(A,MATSEQCSRPERM,MAT_REUSE_MATRIX,&A);
703: return(0);
704: }