Actual source code: maij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the MAIJ matrix storage format.
5: This format is used for restriction and interpolation operations for
6: multicomponent problems. It interpolates each component the same way
7: independently.
9: We provide:
10: MatMult()
11: MatMultTranspose()
12: MatMultTransposeAdd()
13: MatMultAdd()
14: and
15: MatCreateMAIJ(Mat,dof,Mat*)
17: This single directory handles both the sequential and parallel codes
18: */
20: #include src/mat/impls/maij/maij.h
21: #include src/mat/utils/freespace.h
22: #include private/vecimpl.h
26: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
27: {
29: PetscTruth ismpimaij,isseqmaij;
32: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
33: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
34: if (ismpimaij) {
35: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
37: *B = b->A;
38: } else if (isseqmaij) {
39: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
41: *B = b->AIJ;
42: } else {
43: *B = A;
44: }
45: return(0);
46: }
50: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51: {
53: Mat Aij;
56: MatMAIJGetAIJ(A,&Aij);
57: MatCreateMAIJ(Aij,dof,B);
58: return(0);
59: }
63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64: {
66: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
69: if (b->AIJ) {
70: MatDestroy(b->AIJ);
71: }
72: PetscFree(b);
73: return(0);
74: }
78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79: {
81: Mat B;
84: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85: MatView(B,viewer);
86: MatDestroy(B);
87: return(0);
88: }
92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93: {
95: Mat B;
98: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99: MatView(B,viewer);
100: MatDestroy(B);
101: return(0);
102: }
106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
112: if (b->AIJ) {
113: MatDestroy(b->AIJ);
114: }
115: if (b->OAIJ) {
116: MatDestroy(b->OAIJ);
117: }
118: if (b->A) {
119: MatDestroy(b->A);
120: }
121: if (b->ctx) {
122: VecScatterDestroy(b->ctx);
123: }
124: if (b->w) {
125: VecDestroy(b->w);
126: }
127: PetscFree(b);
128: PetscObjectChangeTypeName((PetscObject)A,0);
129: return(0);
130: }
132: /*MC
133: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
134: multicomponent problems, interpolating or restricting each component the same way independently.
135: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
137: Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd
143: Level: advanced
145: .seealso: MatCreateSeqDense
146: M*/
151: PetscErrorCode MatCreate_MAIJ(Mat A)
152: {
154: Mat_MPIMAIJ *b;
155: PetscMPIInt size;
158: PetscNew(Mat_MPIMAIJ,&b);
159: A->data = (void*)b;
160: PetscMemzero(A->ops,sizeof(struct _MatOps));
161: A->factor = 0;
162: A->mapping = 0;
164: b->AIJ = 0;
165: b->dof = 0;
166: b->OAIJ = 0;
167: b->ctx = 0;
168: b->w = 0;
169: MPI_Comm_size(A->comm,&size);
170: if (size == 1){
171: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
172: } else {
173: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
174: }
175: return(0);
176: }
179: /* --------------------------------------------------------------------------------------*/
182: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
185: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
186: PetscScalar *x,*y,*v,sum1, sum2;
188: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
189: PetscInt n,i,jrow,j;
192: VecGetArray(xx,&x);
193: VecGetArray(yy,&y);
194: idx = a->j;
195: v = a->a;
196: ii = a->i;
198: for (i=0; i<m; i++) {
199: jrow = ii[i];
200: n = ii[i+1] - jrow;
201: sum1 = 0.0;
202: sum2 = 0.0;
203: for (j=0; j<n; j++) {
204: sum1 += v[jrow]*x[2*idx[jrow]];
205: sum2 += v[jrow]*x[2*idx[jrow]+1];
206: jrow++;
207: }
208: y[2*i] = sum1;
209: y[2*i+1] = sum2;
210: }
212: PetscLogFlops(4*a->nz - 2*m);
213: VecRestoreArray(xx,&x);
214: VecRestoreArray(yy,&y);
215: return(0);
216: }
220: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
221: {
222: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
223: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
224: PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
226: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
229: VecSet(yy,zero);
230: VecGetArray(xx,&x);
231: VecGetArray(yy,&y);
232:
233: for (i=0; i<m; i++) {
234: idx = a->j + a->i[i] ;
235: v = a->a + a->i[i] ;
236: n = a->i[i+1] - a->i[i];
237: alpha1 = x[2*i];
238: alpha2 = x[2*i+1];
239: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
240: }
241: PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
242: VecRestoreArray(xx,&x);
243: VecRestoreArray(yy,&y);
244: return(0);
245: }
249: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250: {
251: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
252: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
253: PetscScalar *x,*y,*v,sum1, sum2;
255: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
256: PetscInt n,i,jrow,j;
259: if (yy != zz) {VecCopy(yy,zz);}
260: VecGetArray(xx,&x);
261: VecGetArray(zz,&y);
262: idx = a->j;
263: v = a->a;
264: ii = a->i;
266: for (i=0; i<m; i++) {
267: jrow = ii[i];
268: n = ii[i+1] - jrow;
269: sum1 = 0.0;
270: sum2 = 0.0;
271: for (j=0; j<n; j++) {
272: sum1 += v[jrow]*x[2*idx[jrow]];
273: sum2 += v[jrow]*x[2*idx[jrow]+1];
274: jrow++;
275: }
276: y[2*i] += sum1;
277: y[2*i+1] += sum2;
278: }
280: PetscLogFlops(4*a->nz - 2*m);
281: VecRestoreArray(xx,&x);
282: VecRestoreArray(zz,&y);
283: return(0);
284: }
287: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
288: {
289: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
290: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
291: PetscScalar *x,*y,*v,alpha1,alpha2;
293: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
296: if (yy != zz) {VecCopy(yy,zz);}
297: VecGetArray(xx,&x);
298: VecGetArray(zz,&y);
299:
300: for (i=0; i<m; i++) {
301: idx = a->j + a->i[i] ;
302: v = a->a + a->i[i] ;
303: n = a->i[i+1] - a->i[i];
304: alpha1 = x[2*i];
305: alpha2 = x[2*i+1];
306: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
307: }
308: PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
309: VecRestoreArray(xx,&x);
310: VecRestoreArray(zz,&y);
311: return(0);
312: }
313: /* --------------------------------------------------------------------------------------*/
316: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
317: {
318: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
319: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
320: PetscScalar *x,*y,*v,sum1, sum2, sum3;
322: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
323: PetscInt n,i,jrow,j;
326: VecGetArray(xx,&x);
327: VecGetArray(yy,&y);
328: idx = a->j;
329: v = a->a;
330: ii = a->i;
332: for (i=0; i<m; i++) {
333: jrow = ii[i];
334: n = ii[i+1] - jrow;
335: sum1 = 0.0;
336: sum2 = 0.0;
337: sum3 = 0.0;
338: for (j=0; j<n; j++) {
339: sum1 += v[jrow]*x[3*idx[jrow]];
340: sum2 += v[jrow]*x[3*idx[jrow]+1];
341: sum3 += v[jrow]*x[3*idx[jrow]+2];
342: jrow++;
343: }
344: y[3*i] = sum1;
345: y[3*i+1] = sum2;
346: y[3*i+2] = sum3;
347: }
349: PetscLogFlops(6*a->nz - 3*m);
350: VecRestoreArray(xx,&x);
351: VecRestoreArray(yy,&y);
352: return(0);
353: }
357: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
358: {
359: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
360: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
361: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
363: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
366: VecSet(yy,zero);
367: VecGetArray(xx,&x);
368: VecGetArray(yy,&y);
369:
370: for (i=0; i<m; i++) {
371: idx = a->j + a->i[i];
372: v = a->a + a->i[i];
373: n = a->i[i+1] - a->i[i];
374: alpha1 = x[3*i];
375: alpha2 = x[3*i+1];
376: alpha3 = x[3*i+2];
377: while (n-->0) {
378: y[3*(*idx)] += alpha1*(*v);
379: y[3*(*idx)+1] += alpha2*(*v);
380: y[3*(*idx)+2] += alpha3*(*v);
381: idx++; v++;
382: }
383: }
384: PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);
385: VecRestoreArray(xx,&x);
386: VecRestoreArray(yy,&y);
387: return(0);
388: }
392: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
393: {
394: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
395: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
396: PetscScalar *x,*y,*v,sum1, sum2, sum3;
398: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
399: PetscInt n,i,jrow,j;
402: if (yy != zz) {VecCopy(yy,zz);}
403: VecGetArray(xx,&x);
404: VecGetArray(zz,&y);
405: idx = a->j;
406: v = a->a;
407: ii = a->i;
409: for (i=0; i<m; i++) {
410: jrow = ii[i];
411: n = ii[i+1] - jrow;
412: sum1 = 0.0;
413: sum2 = 0.0;
414: sum3 = 0.0;
415: for (j=0; j<n; j++) {
416: sum1 += v[jrow]*x[3*idx[jrow]];
417: sum2 += v[jrow]*x[3*idx[jrow]+1];
418: sum3 += v[jrow]*x[3*idx[jrow]+2];
419: jrow++;
420: }
421: y[3*i] += sum1;
422: y[3*i+1] += sum2;
423: y[3*i+2] += sum3;
424: }
426: PetscLogFlops(6*a->nz);
427: VecRestoreArray(xx,&x);
428: VecRestoreArray(zz,&y);
429: return(0);
430: }
433: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
434: {
435: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
436: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
437: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3;
439: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
442: if (yy != zz) {VecCopy(yy,zz);}
443: VecGetArray(xx,&x);
444: VecGetArray(zz,&y);
445: for (i=0; i<m; i++) {
446: idx = a->j + a->i[i] ;
447: v = a->a + a->i[i] ;
448: n = a->i[i+1] - a->i[i];
449: alpha1 = x[3*i];
450: alpha2 = x[3*i+1];
451: alpha3 = x[3*i+2];
452: while (n-->0) {
453: y[3*(*idx)] += alpha1*(*v);
454: y[3*(*idx)+1] += alpha2*(*v);
455: y[3*(*idx)+2] += alpha3*(*v);
456: idx++; v++;
457: }
458: }
459: PetscLogFlops(6*a->nz);
460: VecRestoreArray(xx,&x);
461: VecRestoreArray(zz,&y);
462: return(0);
463: }
465: /* ------------------------------------------------------------------------------*/
468: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
469: {
470: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
471: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
472: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
474: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
475: PetscInt n,i,jrow,j;
478: VecGetArray(xx,&x);
479: VecGetArray(yy,&y);
480: idx = a->j;
481: v = a->a;
482: ii = a->i;
484: for (i=0; i<m; i++) {
485: jrow = ii[i];
486: n = ii[i+1] - jrow;
487: sum1 = 0.0;
488: sum2 = 0.0;
489: sum3 = 0.0;
490: sum4 = 0.0;
491: for (j=0; j<n; j++) {
492: sum1 += v[jrow]*x[4*idx[jrow]];
493: sum2 += v[jrow]*x[4*idx[jrow]+1];
494: sum3 += v[jrow]*x[4*idx[jrow]+2];
495: sum4 += v[jrow]*x[4*idx[jrow]+3];
496: jrow++;
497: }
498: y[4*i] = sum1;
499: y[4*i+1] = sum2;
500: y[4*i+2] = sum3;
501: y[4*i+3] = sum4;
502: }
504: PetscLogFlops(8*a->nz - 4*m);
505: VecRestoreArray(xx,&x);
506: VecRestoreArray(yy,&y);
507: return(0);
508: }
512: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
513: {
514: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
515: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
516: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
518: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
521: VecSet(yy,zero);
522: VecGetArray(xx,&x);
523: VecGetArray(yy,&y);
524: for (i=0; i<m; i++) {
525: idx = a->j + a->i[i] ;
526: v = a->a + a->i[i] ;
527: n = a->i[i+1] - a->i[i];
528: alpha1 = x[4*i];
529: alpha2 = x[4*i+1];
530: alpha3 = x[4*i+2];
531: alpha4 = x[4*i+3];
532: while (n-->0) {
533: y[4*(*idx)] += alpha1*(*v);
534: y[4*(*idx)+1] += alpha2*(*v);
535: y[4*(*idx)+2] += alpha3*(*v);
536: y[4*(*idx)+3] += alpha4*(*v);
537: idx++; v++;
538: }
539: }
540: PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
541: VecRestoreArray(xx,&x);
542: VecRestoreArray(yy,&y);
543: return(0);
544: }
548: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
549: {
550: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
551: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
552: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
554: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
555: PetscInt n,i,jrow,j;
558: if (yy != zz) {VecCopy(yy,zz);}
559: VecGetArray(xx,&x);
560: VecGetArray(zz,&y);
561: idx = a->j;
562: v = a->a;
563: ii = a->i;
565: for (i=0; i<m; i++) {
566: jrow = ii[i];
567: n = ii[i+1] - jrow;
568: sum1 = 0.0;
569: sum2 = 0.0;
570: sum3 = 0.0;
571: sum4 = 0.0;
572: for (j=0; j<n; j++) {
573: sum1 += v[jrow]*x[4*idx[jrow]];
574: sum2 += v[jrow]*x[4*idx[jrow]+1];
575: sum3 += v[jrow]*x[4*idx[jrow]+2];
576: sum4 += v[jrow]*x[4*idx[jrow]+3];
577: jrow++;
578: }
579: y[4*i] += sum1;
580: y[4*i+1] += sum2;
581: y[4*i+2] += sum3;
582: y[4*i+3] += sum4;
583: }
585: PetscLogFlops(8*a->nz - 4*m);
586: VecRestoreArray(xx,&x);
587: VecRestoreArray(zz,&y);
588: return(0);
589: }
592: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
593: {
594: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
595: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
596: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
598: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
601: if (yy != zz) {VecCopy(yy,zz);}
602: VecGetArray(xx,&x);
603: VecGetArray(zz,&y);
604:
605: for (i=0; i<m; i++) {
606: idx = a->j + a->i[i] ;
607: v = a->a + a->i[i] ;
608: n = a->i[i+1] - a->i[i];
609: alpha1 = x[4*i];
610: alpha2 = x[4*i+1];
611: alpha3 = x[4*i+2];
612: alpha4 = x[4*i+3];
613: while (n-->0) {
614: y[4*(*idx)] += alpha1*(*v);
615: y[4*(*idx)+1] += alpha2*(*v);
616: y[4*(*idx)+2] += alpha3*(*v);
617: y[4*(*idx)+3] += alpha4*(*v);
618: idx++; v++;
619: }
620: }
621: PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
622: VecRestoreArray(xx,&x);
623: VecRestoreArray(zz,&y);
624: return(0);
625: }
626: /* ------------------------------------------------------------------------------*/
630: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
631: {
632: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
633: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
634: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
636: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
637: PetscInt n,i,jrow,j;
640: VecGetArray(xx,&x);
641: VecGetArray(yy,&y);
642: idx = a->j;
643: v = a->a;
644: ii = a->i;
646: for (i=0; i<m; i++) {
647: jrow = ii[i];
648: n = ii[i+1] - jrow;
649: sum1 = 0.0;
650: sum2 = 0.0;
651: sum3 = 0.0;
652: sum4 = 0.0;
653: sum5 = 0.0;
654: for (j=0; j<n; j++) {
655: sum1 += v[jrow]*x[5*idx[jrow]];
656: sum2 += v[jrow]*x[5*idx[jrow]+1];
657: sum3 += v[jrow]*x[5*idx[jrow]+2];
658: sum4 += v[jrow]*x[5*idx[jrow]+3];
659: sum5 += v[jrow]*x[5*idx[jrow]+4];
660: jrow++;
661: }
662: y[5*i] = sum1;
663: y[5*i+1] = sum2;
664: y[5*i+2] = sum3;
665: y[5*i+3] = sum4;
666: y[5*i+4] = sum5;
667: }
669: PetscLogFlops(10*a->nz - 5*m);
670: VecRestoreArray(xx,&x);
671: VecRestoreArray(yy,&y);
672: return(0);
673: }
677: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
678: {
679: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
680: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
681: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
683: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
686: VecSet(yy,zero);
687: VecGetArray(xx,&x);
688: VecGetArray(yy,&y);
689:
690: for (i=0; i<m; i++) {
691: idx = a->j + a->i[i] ;
692: v = a->a + a->i[i] ;
693: n = a->i[i+1] - a->i[i];
694: alpha1 = x[5*i];
695: alpha2 = x[5*i+1];
696: alpha3 = x[5*i+2];
697: alpha4 = x[5*i+3];
698: alpha5 = x[5*i+4];
699: while (n-->0) {
700: y[5*(*idx)] += alpha1*(*v);
701: y[5*(*idx)+1] += alpha2*(*v);
702: y[5*(*idx)+2] += alpha3*(*v);
703: y[5*(*idx)+3] += alpha4*(*v);
704: y[5*(*idx)+4] += alpha5*(*v);
705: idx++; v++;
706: }
707: }
708: PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);
709: VecRestoreArray(xx,&x);
710: VecRestoreArray(yy,&y);
711: return(0);
712: }
716: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
717: {
718: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
719: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
720: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
722: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
723: PetscInt n,i,jrow,j;
726: if (yy != zz) {VecCopy(yy,zz);}
727: VecGetArray(xx,&x);
728: VecGetArray(zz,&y);
729: idx = a->j;
730: v = a->a;
731: ii = a->i;
733: for (i=0; i<m; i++) {
734: jrow = ii[i];
735: n = ii[i+1] - jrow;
736: sum1 = 0.0;
737: sum2 = 0.0;
738: sum3 = 0.0;
739: sum4 = 0.0;
740: sum5 = 0.0;
741: for (j=0; j<n; j++) {
742: sum1 += v[jrow]*x[5*idx[jrow]];
743: sum2 += v[jrow]*x[5*idx[jrow]+1];
744: sum3 += v[jrow]*x[5*idx[jrow]+2];
745: sum4 += v[jrow]*x[5*idx[jrow]+3];
746: sum5 += v[jrow]*x[5*idx[jrow]+4];
747: jrow++;
748: }
749: y[5*i] += sum1;
750: y[5*i+1] += sum2;
751: y[5*i+2] += sum3;
752: y[5*i+3] += sum4;
753: y[5*i+4] += sum5;
754: }
756: PetscLogFlops(10*a->nz);
757: VecRestoreArray(xx,&x);
758: VecRestoreArray(zz,&y);
759: return(0);
760: }
764: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
765: {
766: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
767: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
768: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
770: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
773: if (yy != zz) {VecCopy(yy,zz);}
774: VecGetArray(xx,&x);
775: VecGetArray(zz,&y);
776:
777: for (i=0; i<m; i++) {
778: idx = a->j + a->i[i] ;
779: v = a->a + a->i[i] ;
780: n = a->i[i+1] - a->i[i];
781: alpha1 = x[5*i];
782: alpha2 = x[5*i+1];
783: alpha3 = x[5*i+2];
784: alpha4 = x[5*i+3];
785: alpha5 = x[5*i+4];
786: while (n-->0) {
787: y[5*(*idx)] += alpha1*(*v);
788: y[5*(*idx)+1] += alpha2*(*v);
789: y[5*(*idx)+2] += alpha3*(*v);
790: y[5*(*idx)+3] += alpha4*(*v);
791: y[5*(*idx)+4] += alpha5*(*v);
792: idx++; v++;
793: }
794: }
795: PetscLogFlops(10*a->nz);
796: VecRestoreArray(xx,&x);
797: VecRestoreArray(zz,&y);
798: return(0);
799: }
801: /* ------------------------------------------------------------------------------*/
804: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
805: {
806: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
807: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
808: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
810: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
811: PetscInt n,i,jrow,j;
814: VecGetArray(xx,&x);
815: VecGetArray(yy,&y);
816: idx = a->j;
817: v = a->a;
818: ii = a->i;
820: for (i=0; i<m; i++) {
821: jrow = ii[i];
822: n = ii[i+1] - jrow;
823: sum1 = 0.0;
824: sum2 = 0.0;
825: sum3 = 0.0;
826: sum4 = 0.0;
827: sum5 = 0.0;
828: sum6 = 0.0;
829: for (j=0; j<n; j++) {
830: sum1 += v[jrow]*x[6*idx[jrow]];
831: sum2 += v[jrow]*x[6*idx[jrow]+1];
832: sum3 += v[jrow]*x[6*idx[jrow]+2];
833: sum4 += v[jrow]*x[6*idx[jrow]+3];
834: sum5 += v[jrow]*x[6*idx[jrow]+4];
835: sum6 += v[jrow]*x[6*idx[jrow]+5];
836: jrow++;
837: }
838: y[6*i] = sum1;
839: y[6*i+1] = sum2;
840: y[6*i+2] = sum3;
841: y[6*i+3] = sum4;
842: y[6*i+4] = sum5;
843: y[6*i+5] = sum6;
844: }
846: PetscLogFlops(12*a->nz - 6*m);
847: VecRestoreArray(xx,&x);
848: VecRestoreArray(yy,&y);
849: return(0);
850: }
854: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
855: {
856: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
857: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
858: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
860: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
863: VecSet(yy,zero);
864: VecGetArray(xx,&x);
865: VecGetArray(yy,&y);
867: for (i=0; i<m; i++) {
868: idx = a->j + a->i[i] ;
869: v = a->a + a->i[i] ;
870: n = a->i[i+1] - a->i[i];
871: alpha1 = x[6*i];
872: alpha2 = x[6*i+1];
873: alpha3 = x[6*i+2];
874: alpha4 = x[6*i+3];
875: alpha5 = x[6*i+4];
876: alpha6 = x[6*i+5];
877: while (n-->0) {
878: y[6*(*idx)] += alpha1*(*v);
879: y[6*(*idx)+1] += alpha2*(*v);
880: y[6*(*idx)+2] += alpha3*(*v);
881: y[6*(*idx)+3] += alpha4*(*v);
882: y[6*(*idx)+4] += alpha5*(*v);
883: y[6*(*idx)+5] += alpha6*(*v);
884: idx++; v++;
885: }
886: }
887: PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);
888: VecRestoreArray(xx,&x);
889: VecRestoreArray(yy,&y);
890: return(0);
891: }
895: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
896: {
897: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
898: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
899: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
901: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
902: PetscInt n,i,jrow,j;
905: if (yy != zz) {VecCopy(yy,zz);}
906: VecGetArray(xx,&x);
907: VecGetArray(zz,&y);
908: idx = a->j;
909: v = a->a;
910: ii = a->i;
912: for (i=0; i<m; i++) {
913: jrow = ii[i];
914: n = ii[i+1] - jrow;
915: sum1 = 0.0;
916: sum2 = 0.0;
917: sum3 = 0.0;
918: sum4 = 0.0;
919: sum5 = 0.0;
920: sum6 = 0.0;
921: for (j=0; j<n; j++) {
922: sum1 += v[jrow]*x[6*idx[jrow]];
923: sum2 += v[jrow]*x[6*idx[jrow]+1];
924: sum3 += v[jrow]*x[6*idx[jrow]+2];
925: sum4 += v[jrow]*x[6*idx[jrow]+3];
926: sum5 += v[jrow]*x[6*idx[jrow]+4];
927: sum6 += v[jrow]*x[6*idx[jrow]+5];
928: jrow++;
929: }
930: y[6*i] += sum1;
931: y[6*i+1] += sum2;
932: y[6*i+2] += sum3;
933: y[6*i+3] += sum4;
934: y[6*i+4] += sum5;
935: y[6*i+5] += sum6;
936: }
938: PetscLogFlops(12*a->nz);
939: VecRestoreArray(xx,&x);
940: VecRestoreArray(zz,&y);
941: return(0);
942: }
946: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
947: {
948: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
949: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
950: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
952: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
955: if (yy != zz) {VecCopy(yy,zz);}
956: VecGetArray(xx,&x);
957: VecGetArray(zz,&y);
958:
959: for (i=0; i<m; i++) {
960: idx = a->j + a->i[i] ;
961: v = a->a + a->i[i] ;
962: n = a->i[i+1] - a->i[i];
963: alpha1 = x[6*i];
964: alpha2 = x[6*i+1];
965: alpha3 = x[6*i+2];
966: alpha4 = x[6*i+3];
967: alpha5 = x[6*i+4];
968: alpha6 = x[6*i+5];
969: while (n-->0) {
970: y[6*(*idx)] += alpha1*(*v);
971: y[6*(*idx)+1] += alpha2*(*v);
972: y[6*(*idx)+2] += alpha3*(*v);
973: y[6*(*idx)+3] += alpha4*(*v);
974: y[6*(*idx)+4] += alpha5*(*v);
975: y[6*(*idx)+5] += alpha6*(*v);
976: idx++; v++;
977: }
978: }
979: PetscLogFlops(12*a->nz);
980: VecRestoreArray(xx,&x);
981: VecRestoreArray(zz,&y);
982: return(0);
983: }
985: /* ------------------------------------------------------------------------------*/
988: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
989: {
990: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
991: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
992: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
994: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
995: PetscInt n,i,jrow,j;
998: VecGetArray(xx,&x);
999: VecGetArray(yy,&y);
1000: idx = a->j;
1001: v = a->a;
1002: ii = a->i;
1004: for (i=0; i<m; i++) {
1005: jrow = ii[i];
1006: n = ii[i+1] - jrow;
1007: sum1 = 0.0;
1008: sum2 = 0.0;
1009: sum3 = 0.0;
1010: sum4 = 0.0;
1011: sum5 = 0.0;
1012: sum6 = 0.0;
1013: sum7 = 0.0;
1014: for (j=0; j<n; j++) {
1015: sum1 += v[jrow]*x[7*idx[jrow]];
1016: sum2 += v[jrow]*x[7*idx[jrow]+1];
1017: sum3 += v[jrow]*x[7*idx[jrow]+2];
1018: sum4 += v[jrow]*x[7*idx[jrow]+3];
1019: sum5 += v[jrow]*x[7*idx[jrow]+4];
1020: sum6 += v[jrow]*x[7*idx[jrow]+5];
1021: sum7 += v[jrow]*x[7*idx[jrow]+6];
1022: jrow++;
1023: }
1024: y[7*i] = sum1;
1025: y[7*i+1] = sum2;
1026: y[7*i+2] = sum3;
1027: y[7*i+3] = sum4;
1028: y[7*i+4] = sum5;
1029: y[7*i+5] = sum6;
1030: y[7*i+6] = sum7;
1031: }
1033: PetscLogFlops(14*a->nz - 7*m);
1034: VecRestoreArray(xx,&x);
1035: VecRestoreArray(yy,&y);
1036: return(0);
1037: }
1041: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1042: {
1043: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1044: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1045: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1047: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1050: VecSet(yy,zero);
1051: VecGetArray(xx,&x);
1052: VecGetArray(yy,&y);
1054: for (i=0; i<m; i++) {
1055: idx = a->j + a->i[i] ;
1056: v = a->a + a->i[i] ;
1057: n = a->i[i+1] - a->i[i];
1058: alpha1 = x[7*i];
1059: alpha2 = x[7*i+1];
1060: alpha3 = x[7*i+2];
1061: alpha4 = x[7*i+3];
1062: alpha5 = x[7*i+4];
1063: alpha6 = x[7*i+5];
1064: alpha7 = x[7*i+6];
1065: while (n-->0) {
1066: y[7*(*idx)] += alpha1*(*v);
1067: y[7*(*idx)+1] += alpha2*(*v);
1068: y[7*(*idx)+2] += alpha3*(*v);
1069: y[7*(*idx)+3] += alpha4*(*v);
1070: y[7*(*idx)+4] += alpha5*(*v);
1071: y[7*(*idx)+5] += alpha6*(*v);
1072: y[7*(*idx)+6] += alpha7*(*v);
1073: idx++; v++;
1074: }
1075: }
1076: PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);
1077: VecRestoreArray(xx,&x);
1078: VecRestoreArray(yy,&y);
1079: return(0);
1080: }
1084: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1085: {
1086: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1087: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1088: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1090: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1091: PetscInt n,i,jrow,j;
1094: if (yy != zz) {VecCopy(yy,zz);}
1095: VecGetArray(xx,&x);
1096: VecGetArray(zz,&y);
1097: idx = a->j;
1098: v = a->a;
1099: ii = a->i;
1101: for (i=0; i<m; i++) {
1102: jrow = ii[i];
1103: n = ii[i+1] - jrow;
1104: sum1 = 0.0;
1105: sum2 = 0.0;
1106: sum3 = 0.0;
1107: sum4 = 0.0;
1108: sum5 = 0.0;
1109: sum6 = 0.0;
1110: sum7 = 0.0;
1111: for (j=0; j<n; j++) {
1112: sum1 += v[jrow]*x[7*idx[jrow]];
1113: sum2 += v[jrow]*x[7*idx[jrow]+1];
1114: sum3 += v[jrow]*x[7*idx[jrow]+2];
1115: sum4 += v[jrow]*x[7*idx[jrow]+3];
1116: sum5 += v[jrow]*x[7*idx[jrow]+4];
1117: sum6 += v[jrow]*x[7*idx[jrow]+5];
1118: sum7 += v[jrow]*x[7*idx[jrow]+6];
1119: jrow++;
1120: }
1121: y[7*i] += sum1;
1122: y[7*i+1] += sum2;
1123: y[7*i+2] += sum3;
1124: y[7*i+3] += sum4;
1125: y[7*i+4] += sum5;
1126: y[7*i+5] += sum6;
1127: y[7*i+6] += sum7;
1128: }
1130: PetscLogFlops(14*a->nz);
1131: VecRestoreArray(xx,&x);
1132: VecRestoreArray(zz,&y);
1133: return(0);
1134: }
1138: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1139: {
1140: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1141: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1142: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1144: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1147: if (yy != zz) {VecCopy(yy,zz);}
1148: VecGetArray(xx,&x);
1149: VecGetArray(zz,&y);
1150: for (i=0; i<m; i++) {
1151: idx = a->j + a->i[i] ;
1152: v = a->a + a->i[i] ;
1153: n = a->i[i+1] - a->i[i];
1154: alpha1 = x[7*i];
1155: alpha2 = x[7*i+1];
1156: alpha3 = x[7*i+2];
1157: alpha4 = x[7*i+3];
1158: alpha5 = x[7*i+4];
1159: alpha6 = x[7*i+5];
1160: alpha7 = x[7*i+6];
1161: while (n-->0) {
1162: y[7*(*idx)] += alpha1*(*v);
1163: y[7*(*idx)+1] += alpha2*(*v);
1164: y[7*(*idx)+2] += alpha3*(*v);
1165: y[7*(*idx)+3] += alpha4*(*v);
1166: y[7*(*idx)+4] += alpha5*(*v);
1167: y[7*(*idx)+5] += alpha6*(*v);
1168: y[7*(*idx)+6] += alpha7*(*v);
1169: idx++; v++;
1170: }
1171: }
1172: PetscLogFlops(14*a->nz);
1173: VecRestoreArray(xx,&x);
1174: VecRestoreArray(zz,&y);
1175: return(0);
1176: }
1180: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1181: {
1182: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1183: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1184: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1186: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1187: PetscInt n,i,jrow,j;
1190: VecGetArray(xx,&x);
1191: VecGetArray(yy,&y);
1192: idx = a->j;
1193: v = a->a;
1194: ii = a->i;
1196: for (i=0; i<m; i++) {
1197: jrow = ii[i];
1198: n = ii[i+1] - jrow;
1199: sum1 = 0.0;
1200: sum2 = 0.0;
1201: sum3 = 0.0;
1202: sum4 = 0.0;
1203: sum5 = 0.0;
1204: sum6 = 0.0;
1205: sum7 = 0.0;
1206: sum8 = 0.0;
1207: for (j=0; j<n; j++) {
1208: sum1 += v[jrow]*x[8*idx[jrow]];
1209: sum2 += v[jrow]*x[8*idx[jrow]+1];
1210: sum3 += v[jrow]*x[8*idx[jrow]+2];
1211: sum4 += v[jrow]*x[8*idx[jrow]+3];
1212: sum5 += v[jrow]*x[8*idx[jrow]+4];
1213: sum6 += v[jrow]*x[8*idx[jrow]+5];
1214: sum7 += v[jrow]*x[8*idx[jrow]+6];
1215: sum8 += v[jrow]*x[8*idx[jrow]+7];
1216: jrow++;
1217: }
1218: y[8*i] = sum1;
1219: y[8*i+1] = sum2;
1220: y[8*i+2] = sum3;
1221: y[8*i+3] = sum4;
1222: y[8*i+4] = sum5;
1223: y[8*i+5] = sum6;
1224: y[8*i+6] = sum7;
1225: y[8*i+7] = sum8;
1226: }
1228: PetscLogFlops(16*a->nz - 8*m);
1229: VecRestoreArray(xx,&x);
1230: VecRestoreArray(yy,&y);
1231: return(0);
1232: }
1236: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1237: {
1238: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1239: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1240: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1242: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1245: VecSet(yy,zero);
1246: VecGetArray(xx,&x);
1247: VecGetArray(yy,&y);
1249: for (i=0; i<m; i++) {
1250: idx = a->j + a->i[i] ;
1251: v = a->a + a->i[i] ;
1252: n = a->i[i+1] - a->i[i];
1253: alpha1 = x[8*i];
1254: alpha2 = x[8*i+1];
1255: alpha3 = x[8*i+2];
1256: alpha4 = x[8*i+3];
1257: alpha5 = x[8*i+4];
1258: alpha6 = x[8*i+5];
1259: alpha7 = x[8*i+6];
1260: alpha8 = x[8*i+7];
1261: while (n-->0) {
1262: y[8*(*idx)] += alpha1*(*v);
1263: y[8*(*idx)+1] += alpha2*(*v);
1264: y[8*(*idx)+2] += alpha3*(*v);
1265: y[8*(*idx)+3] += alpha4*(*v);
1266: y[8*(*idx)+4] += alpha5*(*v);
1267: y[8*(*idx)+5] += alpha6*(*v);
1268: y[8*(*idx)+6] += alpha7*(*v);
1269: y[8*(*idx)+7] += alpha8*(*v);
1270: idx++; v++;
1271: }
1272: }
1273: PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);
1274: VecRestoreArray(xx,&x);
1275: VecRestoreArray(yy,&y);
1276: return(0);
1277: }
1281: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1282: {
1283: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1284: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1285: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1287: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1288: PetscInt n,i,jrow,j;
1291: if (yy != zz) {VecCopy(yy,zz);}
1292: VecGetArray(xx,&x);
1293: VecGetArray(zz,&y);
1294: idx = a->j;
1295: v = a->a;
1296: ii = a->i;
1298: for (i=0; i<m; i++) {
1299: jrow = ii[i];
1300: n = ii[i+1] - jrow;
1301: sum1 = 0.0;
1302: sum2 = 0.0;
1303: sum3 = 0.0;
1304: sum4 = 0.0;
1305: sum5 = 0.0;
1306: sum6 = 0.0;
1307: sum7 = 0.0;
1308: sum8 = 0.0;
1309: for (j=0; j<n; j++) {
1310: sum1 += v[jrow]*x[8*idx[jrow]];
1311: sum2 += v[jrow]*x[8*idx[jrow]+1];
1312: sum3 += v[jrow]*x[8*idx[jrow]+2];
1313: sum4 += v[jrow]*x[8*idx[jrow]+3];
1314: sum5 += v[jrow]*x[8*idx[jrow]+4];
1315: sum6 += v[jrow]*x[8*idx[jrow]+5];
1316: sum7 += v[jrow]*x[8*idx[jrow]+6];
1317: sum8 += v[jrow]*x[8*idx[jrow]+7];
1318: jrow++;
1319: }
1320: y[8*i] += sum1;
1321: y[8*i+1] += sum2;
1322: y[8*i+2] += sum3;
1323: y[8*i+3] += sum4;
1324: y[8*i+4] += sum5;
1325: y[8*i+5] += sum6;
1326: y[8*i+6] += sum7;
1327: y[8*i+7] += sum8;
1328: }
1330: PetscLogFlops(16*a->nz);
1331: VecRestoreArray(xx,&x);
1332: VecRestoreArray(zz,&y);
1333: return(0);
1334: }
1338: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1339: {
1340: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1341: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1342: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1344: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1347: if (yy != zz) {VecCopy(yy,zz);}
1348: VecGetArray(xx,&x);
1349: VecGetArray(zz,&y);
1350: for (i=0; i<m; i++) {
1351: idx = a->j + a->i[i] ;
1352: v = a->a + a->i[i] ;
1353: n = a->i[i+1] - a->i[i];
1354: alpha1 = x[8*i];
1355: alpha2 = x[8*i+1];
1356: alpha3 = x[8*i+2];
1357: alpha4 = x[8*i+3];
1358: alpha5 = x[8*i+4];
1359: alpha6 = x[8*i+5];
1360: alpha7 = x[8*i+6];
1361: alpha8 = x[8*i+7];
1362: while (n-->0) {
1363: y[8*(*idx)] += alpha1*(*v);
1364: y[8*(*idx)+1] += alpha2*(*v);
1365: y[8*(*idx)+2] += alpha3*(*v);
1366: y[8*(*idx)+3] += alpha4*(*v);
1367: y[8*(*idx)+4] += alpha5*(*v);
1368: y[8*(*idx)+5] += alpha6*(*v);
1369: y[8*(*idx)+6] += alpha7*(*v);
1370: y[8*(*idx)+7] += alpha8*(*v);
1371: idx++; v++;
1372: }
1373: }
1374: PetscLogFlops(16*a->nz);
1375: VecRestoreArray(xx,&x);
1376: VecRestoreArray(zz,&y);
1377: return(0);
1378: }
1380: /* ------------------------------------------------------------------------------*/
1383: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1384: {
1385: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1386: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1387: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1389: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1390: PetscInt n,i,jrow,j;
1393: VecGetArray(xx,&x);
1394: VecGetArray(yy,&y);
1395: idx = a->j;
1396: v = a->a;
1397: ii = a->i;
1399: for (i=0; i<m; i++) {
1400: jrow = ii[i];
1401: n = ii[i+1] - jrow;
1402: sum1 = 0.0;
1403: sum2 = 0.0;
1404: sum3 = 0.0;
1405: sum4 = 0.0;
1406: sum5 = 0.0;
1407: sum6 = 0.0;
1408: sum7 = 0.0;
1409: sum8 = 0.0;
1410: sum9 = 0.0;
1411: for (j=0; j<n; j++) {
1412: sum1 += v[jrow]*x[9*idx[jrow]];
1413: sum2 += v[jrow]*x[9*idx[jrow]+1];
1414: sum3 += v[jrow]*x[9*idx[jrow]+2];
1415: sum4 += v[jrow]*x[9*idx[jrow]+3];
1416: sum5 += v[jrow]*x[9*idx[jrow]+4];
1417: sum6 += v[jrow]*x[9*idx[jrow]+5];
1418: sum7 += v[jrow]*x[9*idx[jrow]+6];
1419: sum8 += v[jrow]*x[9*idx[jrow]+7];
1420: sum9 += v[jrow]*x[9*idx[jrow]+8];
1421: jrow++;
1422: }
1423: y[9*i] = sum1;
1424: y[9*i+1] = sum2;
1425: y[9*i+2] = sum3;
1426: y[9*i+3] = sum4;
1427: y[9*i+4] = sum5;
1428: y[9*i+5] = sum6;
1429: y[9*i+6] = sum7;
1430: y[9*i+7] = sum8;
1431: y[9*i+8] = sum9;
1432: }
1434: PetscLogFlops(18*a->nz - 9*m);
1435: VecRestoreArray(xx,&x);
1436: VecRestoreArray(yy,&y);
1437: return(0);
1438: }
1440: /* ------------------------------------------------------------------------------*/
1444: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1445: {
1446: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1447: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1448: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1450: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1453: VecSet(yy,zero);
1454: VecGetArray(xx,&x);
1455: VecGetArray(yy,&y);
1457: for (i=0; i<m; i++) {
1458: idx = a->j + a->i[i] ;
1459: v = a->a + a->i[i] ;
1460: n = a->i[i+1] - a->i[i];
1461: alpha1 = x[9*i];
1462: alpha2 = x[9*i+1];
1463: alpha3 = x[9*i+2];
1464: alpha4 = x[9*i+3];
1465: alpha5 = x[9*i+4];
1466: alpha6 = x[9*i+5];
1467: alpha7 = x[9*i+6];
1468: alpha8 = x[9*i+7];
1469: alpha9 = x[9*i+8];
1470: while (n-->0) {
1471: y[9*(*idx)] += alpha1*(*v);
1472: y[9*(*idx)+1] += alpha2*(*v);
1473: y[9*(*idx)+2] += alpha3*(*v);
1474: y[9*(*idx)+3] += alpha4*(*v);
1475: y[9*(*idx)+4] += alpha5*(*v);
1476: y[9*(*idx)+5] += alpha6*(*v);
1477: y[9*(*idx)+6] += alpha7*(*v);
1478: y[9*(*idx)+7] += alpha8*(*v);
1479: y[9*(*idx)+8] += alpha9*(*v);
1480: idx++; v++;
1481: }
1482: }
1483: PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);
1484: VecRestoreArray(xx,&x);
1485: VecRestoreArray(yy,&y);
1486: return(0);
1487: }
1491: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1492: {
1493: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1494: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1495: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1497: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1498: PetscInt n,i,jrow,j;
1501: if (yy != zz) {VecCopy(yy,zz);}
1502: VecGetArray(xx,&x);
1503: VecGetArray(zz,&y);
1504: idx = a->j;
1505: v = a->a;
1506: ii = a->i;
1508: for (i=0; i<m; i++) {
1509: jrow = ii[i];
1510: n = ii[i+1] - jrow;
1511: sum1 = 0.0;
1512: sum2 = 0.0;
1513: sum3 = 0.0;
1514: sum4 = 0.0;
1515: sum5 = 0.0;
1516: sum6 = 0.0;
1517: sum7 = 0.0;
1518: sum8 = 0.0;
1519: sum9 = 0.0;
1520: for (j=0; j<n; j++) {
1521: sum1 += v[jrow]*x[9*idx[jrow]];
1522: sum2 += v[jrow]*x[9*idx[jrow]+1];
1523: sum3 += v[jrow]*x[9*idx[jrow]+2];
1524: sum4 += v[jrow]*x[9*idx[jrow]+3];
1525: sum5 += v[jrow]*x[9*idx[jrow]+4];
1526: sum6 += v[jrow]*x[9*idx[jrow]+5];
1527: sum7 += v[jrow]*x[9*idx[jrow]+6];
1528: sum8 += v[jrow]*x[9*idx[jrow]+7];
1529: sum9 += v[jrow]*x[9*idx[jrow]+8];
1530: jrow++;
1531: }
1532: y[9*i] += sum1;
1533: y[9*i+1] += sum2;
1534: y[9*i+2] += sum3;
1535: y[9*i+3] += sum4;
1536: y[9*i+4] += sum5;
1537: y[9*i+5] += sum6;
1538: y[9*i+6] += sum7;
1539: y[9*i+7] += sum8;
1540: y[9*i+8] += sum9;
1541: }
1543: PetscLogFlops(18*a->nz);
1544: VecRestoreArray(xx,&x);
1545: VecRestoreArray(zz,&y);
1546: return(0);
1547: }
1551: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1552: {
1553: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1554: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1555: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1557: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1560: if (yy != zz) {VecCopy(yy,zz);}
1561: VecGetArray(xx,&x);
1562: VecGetArray(zz,&y);
1563: for (i=0; i<m; i++) {
1564: idx = a->j + a->i[i] ;
1565: v = a->a + a->i[i] ;
1566: n = a->i[i+1] - a->i[i];
1567: alpha1 = x[9*i];
1568: alpha2 = x[9*i+1];
1569: alpha3 = x[9*i+2];
1570: alpha4 = x[9*i+3];
1571: alpha5 = x[9*i+4];
1572: alpha6 = x[9*i+5];
1573: alpha7 = x[9*i+6];
1574: alpha8 = x[9*i+7];
1575: alpha9 = x[9*i+8];
1576: while (n-->0) {
1577: y[9*(*idx)] += alpha1*(*v);
1578: y[9*(*idx)+1] += alpha2*(*v);
1579: y[9*(*idx)+2] += alpha3*(*v);
1580: y[9*(*idx)+3] += alpha4*(*v);
1581: y[9*(*idx)+4] += alpha5*(*v);
1582: y[9*(*idx)+5] += alpha6*(*v);
1583: y[9*(*idx)+6] += alpha7*(*v);
1584: y[9*(*idx)+7] += alpha8*(*v);
1585: y[9*(*idx)+8] += alpha9*(*v);
1586: idx++; v++;
1587: }
1588: }
1589: PetscLogFlops(18*a->nz);
1590: VecRestoreArray(xx,&x);
1591: VecRestoreArray(zz,&y);
1592: return(0);
1593: }
1594: /*--------------------------------------------------------------------------------------------*/
1597: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1598: {
1599: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1600: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1601: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1603: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1604: PetscInt n,i,jrow,j;
1607: VecGetArray(xx,&x);
1608: VecGetArray(yy,&y);
1609: idx = a->j;
1610: v = a->a;
1611: ii = a->i;
1613: for (i=0; i<m; i++) {
1614: jrow = ii[i];
1615: n = ii[i+1] - jrow;
1616: sum1 = 0.0;
1617: sum2 = 0.0;
1618: sum3 = 0.0;
1619: sum4 = 0.0;
1620: sum5 = 0.0;
1621: sum6 = 0.0;
1622: sum7 = 0.0;
1623: sum8 = 0.0;
1624: sum9 = 0.0;
1625: sum10 = 0.0;
1626: for (j=0; j<n; j++) {
1627: sum1 += v[jrow]*x[10*idx[jrow]];
1628: sum2 += v[jrow]*x[10*idx[jrow]+1];
1629: sum3 += v[jrow]*x[10*idx[jrow]+2];
1630: sum4 += v[jrow]*x[10*idx[jrow]+3];
1631: sum5 += v[jrow]*x[10*idx[jrow]+4];
1632: sum6 += v[jrow]*x[10*idx[jrow]+5];
1633: sum7 += v[jrow]*x[10*idx[jrow]+6];
1634: sum8 += v[jrow]*x[10*idx[jrow]+7];
1635: sum9 += v[jrow]*x[10*idx[jrow]+8];
1636: sum10 += v[jrow]*x[10*idx[jrow]+9];
1637: jrow++;
1638: }
1639: y[10*i] = sum1;
1640: y[10*i+1] = sum2;
1641: y[10*i+2] = sum3;
1642: y[10*i+3] = sum4;
1643: y[10*i+4] = sum5;
1644: y[10*i+5] = sum6;
1645: y[10*i+6] = sum7;
1646: y[10*i+7] = sum8;
1647: y[10*i+8] = sum9;
1648: y[10*i+9] = sum10;
1649: }
1651: PetscLogFlops(20*a->nz - 10*m);
1652: VecRestoreArray(xx,&x);
1653: VecRestoreArray(yy,&y);
1654: return(0);
1655: }
1659: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1660: {
1661: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1662: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1663: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1665: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1666: PetscInt n,i,jrow,j;
1669: if (yy != zz) {VecCopy(yy,zz);}
1670: VecGetArray(xx,&x);
1671: VecGetArray(zz,&y);
1672: idx = a->j;
1673: v = a->a;
1674: ii = a->i;
1676: for (i=0; i<m; i++) {
1677: jrow = ii[i];
1678: n = ii[i+1] - jrow;
1679: sum1 = 0.0;
1680: sum2 = 0.0;
1681: sum3 = 0.0;
1682: sum4 = 0.0;
1683: sum5 = 0.0;
1684: sum6 = 0.0;
1685: sum7 = 0.0;
1686: sum8 = 0.0;
1687: sum9 = 0.0;
1688: sum10 = 0.0;
1689: for (j=0; j<n; j++) {
1690: sum1 += v[jrow]*x[10*idx[jrow]];
1691: sum2 += v[jrow]*x[10*idx[jrow]+1];
1692: sum3 += v[jrow]*x[10*idx[jrow]+2];
1693: sum4 += v[jrow]*x[10*idx[jrow]+3];
1694: sum5 += v[jrow]*x[10*idx[jrow]+4];
1695: sum6 += v[jrow]*x[10*idx[jrow]+5];
1696: sum7 += v[jrow]*x[10*idx[jrow]+6];
1697: sum8 += v[jrow]*x[10*idx[jrow]+7];
1698: sum9 += v[jrow]*x[10*idx[jrow]+8];
1699: sum10 += v[jrow]*x[10*idx[jrow]+9];
1700: jrow++;
1701: }
1702: y[10*i] += sum1;
1703: y[10*i+1] += sum2;
1704: y[10*i+2] += sum3;
1705: y[10*i+3] += sum4;
1706: y[10*i+4] += sum5;
1707: y[10*i+5] += sum6;
1708: y[10*i+6] += sum7;
1709: y[10*i+7] += sum8;
1710: y[10*i+8] += sum9;
1711: y[10*i+9] += sum10;
1712: }
1714: PetscLogFlops(20*a->nz - 10*m);
1715: VecRestoreArray(xx,&x);
1716: VecRestoreArray(yy,&y);
1717: return(0);
1718: }
1722: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1723: {
1724: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1725: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1726: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1728: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1731: VecSet(yy,zero);
1732: VecGetArray(xx,&x);
1733: VecGetArray(yy,&y);
1735: for (i=0; i<m; i++) {
1736: idx = a->j + a->i[i] ;
1737: v = a->a + a->i[i] ;
1738: n = a->i[i+1] - a->i[i];
1739: alpha1 = x[10*i];
1740: alpha2 = x[10*i+1];
1741: alpha3 = x[10*i+2];
1742: alpha4 = x[10*i+3];
1743: alpha5 = x[10*i+4];
1744: alpha6 = x[10*i+5];
1745: alpha7 = x[10*i+6];
1746: alpha8 = x[10*i+7];
1747: alpha9 = x[10*i+8];
1748: alpha10 = x[10*i+9];
1749: while (n-->0) {
1750: y[10*(*idx)] += alpha1*(*v);
1751: y[10*(*idx)+1] += alpha2*(*v);
1752: y[10*(*idx)+2] += alpha3*(*v);
1753: y[10*(*idx)+3] += alpha4*(*v);
1754: y[10*(*idx)+4] += alpha5*(*v);
1755: y[10*(*idx)+5] += alpha6*(*v);
1756: y[10*(*idx)+6] += alpha7*(*v);
1757: y[10*(*idx)+7] += alpha8*(*v);
1758: y[10*(*idx)+8] += alpha9*(*v);
1759: y[10*(*idx)+9] += alpha10*(*v);
1760: idx++; v++;
1761: }
1762: }
1763: PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);
1764: VecRestoreArray(xx,&x);
1765: VecRestoreArray(yy,&y);
1766: return(0);
1767: }
1771: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1772: {
1773: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1774: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1775: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1777: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1780: if (yy != zz) {VecCopy(yy,zz);}
1781: VecGetArray(xx,&x);
1782: VecGetArray(zz,&y);
1783: for (i=0; i<m; i++) {
1784: idx = a->j + a->i[i] ;
1785: v = a->a + a->i[i] ;
1786: n = a->i[i+1] - a->i[i];
1787: alpha1 = x[10*i];
1788: alpha2 = x[10*i+1];
1789: alpha3 = x[10*i+2];
1790: alpha4 = x[10*i+3];
1791: alpha5 = x[10*i+4];
1792: alpha6 = x[10*i+5];
1793: alpha7 = x[10*i+6];
1794: alpha8 = x[10*i+7];
1795: alpha9 = x[10*i+8];
1796: alpha10 = x[10*i+9];
1797: while (n-->0) {
1798: y[10*(*idx)] += alpha1*(*v);
1799: y[10*(*idx)+1] += alpha2*(*v);
1800: y[10*(*idx)+2] += alpha3*(*v);
1801: y[10*(*idx)+3] += alpha4*(*v);
1802: y[10*(*idx)+4] += alpha5*(*v);
1803: y[10*(*idx)+5] += alpha6*(*v);
1804: y[10*(*idx)+6] += alpha7*(*v);
1805: y[10*(*idx)+7] += alpha8*(*v);
1806: y[10*(*idx)+8] += alpha9*(*v);
1807: y[10*(*idx)+9] += alpha10*(*v);
1808: idx++; v++;
1809: }
1810: }
1811: PetscLogFlops(20*a->nz);
1812: VecRestoreArray(xx,&x);
1813: VecRestoreArray(zz,&y);
1814: return(0);
1815: }
1818: /*--------------------------------------------------------------------------------------------*/
1821: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1822: {
1823: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1824: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1825: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1826: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1828: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1829: PetscInt n,i,jrow,j;
1832: VecGetArray(xx,&x);
1833: VecGetArray(yy,&y);
1834: idx = a->j;
1835: v = a->a;
1836: ii = a->i;
1838: for (i=0; i<m; i++) {
1839: jrow = ii[i];
1840: n = ii[i+1] - jrow;
1841: sum1 = 0.0;
1842: sum2 = 0.0;
1843: sum3 = 0.0;
1844: sum4 = 0.0;
1845: sum5 = 0.0;
1846: sum6 = 0.0;
1847: sum7 = 0.0;
1848: sum8 = 0.0;
1849: sum9 = 0.0;
1850: sum10 = 0.0;
1851: sum11 = 0.0;
1852: sum12 = 0.0;
1853: sum13 = 0.0;
1854: sum14 = 0.0;
1855: sum15 = 0.0;
1856: sum16 = 0.0;
1857: for (j=0; j<n; j++) {
1858: sum1 += v[jrow]*x[16*idx[jrow]];
1859: sum2 += v[jrow]*x[16*idx[jrow]+1];
1860: sum3 += v[jrow]*x[16*idx[jrow]+2];
1861: sum4 += v[jrow]*x[16*idx[jrow]+3];
1862: sum5 += v[jrow]*x[16*idx[jrow]+4];
1863: sum6 += v[jrow]*x[16*idx[jrow]+5];
1864: sum7 += v[jrow]*x[16*idx[jrow]+6];
1865: sum8 += v[jrow]*x[16*idx[jrow]+7];
1866: sum9 += v[jrow]*x[16*idx[jrow]+8];
1867: sum10 += v[jrow]*x[16*idx[jrow]+9];
1868: sum11 += v[jrow]*x[16*idx[jrow]+10];
1869: sum12 += v[jrow]*x[16*idx[jrow]+11];
1870: sum13 += v[jrow]*x[16*idx[jrow]+12];
1871: sum14 += v[jrow]*x[16*idx[jrow]+13];
1872: sum15 += v[jrow]*x[16*idx[jrow]+14];
1873: sum16 += v[jrow]*x[16*idx[jrow]+15];
1874: jrow++;
1875: }
1876: y[16*i] = sum1;
1877: y[16*i+1] = sum2;
1878: y[16*i+2] = sum3;
1879: y[16*i+3] = sum4;
1880: y[16*i+4] = sum5;
1881: y[16*i+5] = sum6;
1882: y[16*i+6] = sum7;
1883: y[16*i+7] = sum8;
1884: y[16*i+8] = sum9;
1885: y[16*i+9] = sum10;
1886: y[16*i+10] = sum11;
1887: y[16*i+11] = sum12;
1888: y[16*i+12] = sum13;
1889: y[16*i+13] = sum14;
1890: y[16*i+14] = sum15;
1891: y[16*i+15] = sum16;
1892: }
1894: PetscLogFlops(32*a->nz - 16*m);
1895: VecRestoreArray(xx,&x);
1896: VecRestoreArray(yy,&y);
1897: return(0);
1898: }
1902: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1903: {
1904: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1905: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1906: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1907: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1909: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1912: VecSet(yy,zero);
1913: VecGetArray(xx,&x);
1914: VecGetArray(yy,&y);
1916: for (i=0; i<m; i++) {
1917: idx = a->j + a->i[i] ;
1918: v = a->a + a->i[i] ;
1919: n = a->i[i+1] - a->i[i];
1920: alpha1 = x[16*i];
1921: alpha2 = x[16*i+1];
1922: alpha3 = x[16*i+2];
1923: alpha4 = x[16*i+3];
1924: alpha5 = x[16*i+4];
1925: alpha6 = x[16*i+5];
1926: alpha7 = x[16*i+6];
1927: alpha8 = x[16*i+7];
1928: alpha9 = x[16*i+8];
1929: alpha10 = x[16*i+9];
1930: alpha11 = x[16*i+10];
1931: alpha12 = x[16*i+11];
1932: alpha13 = x[16*i+12];
1933: alpha14 = x[16*i+13];
1934: alpha15 = x[16*i+14];
1935: alpha16 = x[16*i+15];
1936: while (n-->0) {
1937: y[16*(*idx)] += alpha1*(*v);
1938: y[16*(*idx)+1] += alpha2*(*v);
1939: y[16*(*idx)+2] += alpha3*(*v);
1940: y[16*(*idx)+3] += alpha4*(*v);
1941: y[16*(*idx)+4] += alpha5*(*v);
1942: y[16*(*idx)+5] += alpha6*(*v);
1943: y[16*(*idx)+6] += alpha7*(*v);
1944: y[16*(*idx)+7] += alpha8*(*v);
1945: y[16*(*idx)+8] += alpha9*(*v);
1946: y[16*(*idx)+9] += alpha10*(*v);
1947: y[16*(*idx)+10] += alpha11*(*v);
1948: y[16*(*idx)+11] += alpha12*(*v);
1949: y[16*(*idx)+12] += alpha13*(*v);
1950: y[16*(*idx)+13] += alpha14*(*v);
1951: y[16*(*idx)+14] += alpha15*(*v);
1952: y[16*(*idx)+15] += alpha16*(*v);
1953: idx++; v++;
1954: }
1955: }
1956: PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);
1957: VecRestoreArray(xx,&x);
1958: VecRestoreArray(yy,&y);
1959: return(0);
1960: }
1964: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1965: {
1966: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1967: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1968: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1969: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1971: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1972: PetscInt n,i,jrow,j;
1975: if (yy != zz) {VecCopy(yy,zz);}
1976: VecGetArray(xx,&x);
1977: VecGetArray(zz,&y);
1978: idx = a->j;
1979: v = a->a;
1980: ii = a->i;
1982: for (i=0; i<m; i++) {
1983: jrow = ii[i];
1984: n = ii[i+1] - jrow;
1985: sum1 = 0.0;
1986: sum2 = 0.0;
1987: sum3 = 0.0;
1988: sum4 = 0.0;
1989: sum5 = 0.0;
1990: sum6 = 0.0;
1991: sum7 = 0.0;
1992: sum8 = 0.0;
1993: sum9 = 0.0;
1994: sum10 = 0.0;
1995: sum11 = 0.0;
1996: sum12 = 0.0;
1997: sum13 = 0.0;
1998: sum14 = 0.0;
1999: sum15 = 0.0;
2000: sum16 = 0.0;
2001: for (j=0; j<n; j++) {
2002: sum1 += v[jrow]*x[16*idx[jrow]];
2003: sum2 += v[jrow]*x[16*idx[jrow]+1];
2004: sum3 += v[jrow]*x[16*idx[jrow]+2];
2005: sum4 += v[jrow]*x[16*idx[jrow]+3];
2006: sum5 += v[jrow]*x[16*idx[jrow]+4];
2007: sum6 += v[jrow]*x[16*idx[jrow]+5];
2008: sum7 += v[jrow]*x[16*idx[jrow]+6];
2009: sum8 += v[jrow]*x[16*idx[jrow]+7];
2010: sum9 += v[jrow]*x[16*idx[jrow]+8];
2011: sum10 += v[jrow]*x[16*idx[jrow]+9];
2012: sum11 += v[jrow]*x[16*idx[jrow]+10];
2013: sum12 += v[jrow]*x[16*idx[jrow]+11];
2014: sum13 += v[jrow]*x[16*idx[jrow]+12];
2015: sum14 += v[jrow]*x[16*idx[jrow]+13];
2016: sum15 += v[jrow]*x[16*idx[jrow]+14];
2017: sum16 += v[jrow]*x[16*idx[jrow]+15];
2018: jrow++;
2019: }
2020: y[16*i] += sum1;
2021: y[16*i+1] += sum2;
2022: y[16*i+2] += sum3;
2023: y[16*i+3] += sum4;
2024: y[16*i+4] += sum5;
2025: y[16*i+5] += sum6;
2026: y[16*i+6] += sum7;
2027: y[16*i+7] += sum8;
2028: y[16*i+8] += sum9;
2029: y[16*i+9] += sum10;
2030: y[16*i+10] += sum11;
2031: y[16*i+11] += sum12;
2032: y[16*i+12] += sum13;
2033: y[16*i+13] += sum14;
2034: y[16*i+14] += sum15;
2035: y[16*i+15] += sum16;
2036: }
2038: PetscLogFlops(32*a->nz);
2039: VecRestoreArray(xx,&x);
2040: VecRestoreArray(zz,&y);
2041: return(0);
2042: }
2046: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2047: {
2048: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2049: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2050: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2051: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2053: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
2056: if (yy != zz) {VecCopy(yy,zz);}
2057: VecGetArray(xx,&x);
2058: VecGetArray(zz,&y);
2059: for (i=0; i<m; i++) {
2060: idx = a->j + a->i[i] ;
2061: v = a->a + a->i[i] ;
2062: n = a->i[i+1] - a->i[i];
2063: alpha1 = x[16*i];
2064: alpha2 = x[16*i+1];
2065: alpha3 = x[16*i+2];
2066: alpha4 = x[16*i+3];
2067: alpha5 = x[16*i+4];
2068: alpha6 = x[16*i+5];
2069: alpha7 = x[16*i+6];
2070: alpha8 = x[16*i+7];
2071: alpha9 = x[16*i+8];
2072: alpha10 = x[16*i+9];
2073: alpha11 = x[16*i+10];
2074: alpha12 = x[16*i+11];
2075: alpha13 = x[16*i+12];
2076: alpha14 = x[16*i+13];
2077: alpha15 = x[16*i+14];
2078: alpha16 = x[16*i+15];
2079: while (n-->0) {
2080: y[16*(*idx)] += alpha1*(*v);
2081: y[16*(*idx)+1] += alpha2*(*v);
2082: y[16*(*idx)+2] += alpha3*(*v);
2083: y[16*(*idx)+3] += alpha4*(*v);
2084: y[16*(*idx)+4] += alpha5*(*v);
2085: y[16*(*idx)+5] += alpha6*(*v);
2086: y[16*(*idx)+6] += alpha7*(*v);
2087: y[16*(*idx)+7] += alpha8*(*v);
2088: y[16*(*idx)+8] += alpha9*(*v);
2089: y[16*(*idx)+9] += alpha10*(*v);
2090: y[16*(*idx)+10] += alpha11*(*v);
2091: y[16*(*idx)+11] += alpha12*(*v);
2092: y[16*(*idx)+12] += alpha13*(*v);
2093: y[16*(*idx)+13] += alpha14*(*v);
2094: y[16*(*idx)+14] += alpha15*(*v);
2095: y[16*(*idx)+15] += alpha16*(*v);
2096: idx++; v++;
2097: }
2098: }
2099: PetscLogFlops(32*a->nz);
2100: VecRestoreArray(xx,&x);
2101: VecRestoreArray(zz,&y);
2102: return(0);
2103: }
2105: /*===================================================================================*/
2108: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2109: {
2110: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2114: /* start the scatter */
2115: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2116: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2117: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2118: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2119: return(0);
2120: }
2124: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2125: {
2126: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2130: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2131: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2132: VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2133: VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2134: return(0);
2135: }
2139: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2140: {
2141: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2145: /* start the scatter */
2146: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2147: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2148: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2149: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2150: return(0);
2151: }
2155: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2156: {
2157: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2161: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2162: VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2163: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2164: VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2165: return(0);
2166: }
2168: /* ----------------------------------------------------------------*/
2171: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2172: {
2173: /* This routine requires testing -- but it's getting better. */
2174: PetscErrorCode ierr;
2175: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2176: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2177: Mat P=pp->AIJ;
2178: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2179: PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2180: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2181: PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2182: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2183: MatScalar *ca;
2186: /* Start timer */
2189: /* Get ij structure of P^T */
2190: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2192: cn = pn*ppdof;
2193: /* Allocate ci array, arrays for fill computation and */
2194: /* free space for accumulating nonzero column info */
2195: PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2196: ci[0] = 0;
2198: /* Work arrays for rows of P^T*A */
2199: PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2200: PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2201: ptasparserow = ptadenserow + an;
2202: denserow = ptasparserow + an;
2203: sparserow = denserow + cn;
2205: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2206: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2207: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2208: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2209: current_space = free_space;
2211: /* Determine symbolic info for each row of C: */
2212: for (i=0;i<pn;i++) {
2213: ptnzi = pti[i+1] - pti[i];
2214: ptJ = ptj + pti[i];
2215: for (dof=0;dof<ppdof;dof++) {
2216: ptanzi = 0;
2217: /* Determine symbolic row of PtA: */
2218: for (j=0;j<ptnzi;j++) {
2219: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2220: arow = ptJ[j]*ppdof + dof;
2221: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2222: anzj = ai[arow+1] - ai[arow];
2223: ajj = aj + ai[arow];
2224: for (k=0;k<anzj;k++) {
2225: if (!ptadenserow[ajj[k]]) {
2226: ptadenserow[ajj[k]] = -1;
2227: ptasparserow[ptanzi++] = ajj[k];
2228: }
2229: }
2230: }
2231: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2232: ptaj = ptasparserow;
2233: cnzi = 0;
2234: for (j=0;j<ptanzi;j++) {
2235: /* Get offset within block of P */
2236: pshift = *ptaj%ppdof;
2237: /* Get block row of P */
2238: prow = (*ptaj++)/ppdof; /* integer division */
2239: /* P has same number of nonzeros per row as the compressed form */
2240: pnzj = pi[prow+1] - pi[prow];
2241: pjj = pj + pi[prow];
2242: for (k=0;k<pnzj;k++) {
2243: /* Locations in C are shifted by the offset within the block */
2244: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2245: if (!denserow[pjj[k]*ppdof+pshift]) {
2246: denserow[pjj[k]*ppdof+pshift] = -1;
2247: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
2248: }
2249: }
2250: }
2252: /* sort sparserow */
2253: PetscSortInt(cnzi,sparserow);
2254:
2255: /* If free space is not available, make more free space */
2256: /* Double the amount of total space in the list */
2257: if (current_space->local_remaining<cnzi) {
2258: PetscFreeSpaceGet(current_space->total_array_size,¤t_space);
2259: }
2261: /* Copy data into free space, and zero out denserows */
2262: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2263: current_space->array += cnzi;
2264: current_space->local_used += cnzi;
2265: current_space->local_remaining -= cnzi;
2267: for (j=0;j<ptanzi;j++) {
2268: ptadenserow[ptasparserow[j]] = 0;
2269: }
2270: for (j=0;j<cnzi;j++) {
2271: denserow[sparserow[j]] = 0;
2272: }
2273: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2274: /* For now, we will recompute what is needed. */
2275: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2276: }
2277: }
2278: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2279: /* Allocate space for cj, initialize cj, and */
2280: /* destroy list of free space and other temporary array(s) */
2281: PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2282: PetscFreeSpaceContiguous(&free_space,cj);
2283: PetscFree(ptadenserow);
2284:
2285: /* Allocate space for ca */
2286: PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2287: PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2288:
2289: /* put together the new matrix */
2290: MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);
2292: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2293: /* Since these are PETSc arrays, change flags to free them as necessary. */
2294: c = (Mat_SeqAIJ *)((*C)->data);
2295: c->free_a = PETSC_TRUE;
2296: c->free_ij = PETSC_TRUE;
2297: c->nonew = 0;
2299: /* Clean up. */
2300: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2303: return(0);
2304: }
2308: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2309: {
2310: /* This routine requires testing -- first draft only */
2312: PetscInt flops=0;
2313: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2314: Mat P=pp->AIJ;
2315: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
2316: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
2317: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
2318: PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2319: PetscInt *ci=c->i,*cj=c->j,*cjj;
2320: PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2321: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2322: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2325: /* Allocate temporary array for storage of one row of A*P */
2326: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2327: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));
2329: apj = (PetscInt *)(apa + cn);
2330: apjdense = apj + cn;
2332: /* Clear old values in C */
2333: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
2335: for (i=0;i<am;i++) {
2336: /* Form sparse row of A*P */
2337: anzi = ai[i+1] - ai[i];
2338: apnzj = 0;
2339: for (j=0;j<anzi;j++) {
2340: /* Get offset within block of P */
2341: pshift = *aj%ppdof;
2342: /* Get block row of P */
2343: prow = *aj++/ppdof; /* integer division */
2344: pnzj = pi[prow+1] - pi[prow];
2345: pjj = pj + pi[prow];
2346: paj = pa + pi[prow];
2347: for (k=0;k<pnzj;k++) {
2348: poffset = pjj[k]*ppdof+pshift;
2349: if (!apjdense[poffset]) {
2350: apjdense[poffset] = -1;
2351: apj[apnzj++] = poffset;
2352: }
2353: apa[poffset] += (*aa)*paj[k];
2354: }
2355: flops += 2*pnzj;
2356: aa++;
2357: }
2359: /* Sort the j index array for quick sparse axpy. */
2360: /* Note: a array does not need sorting as it is in dense storage locations. */
2361: PetscSortInt(apnzj,apj);
2363: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2364: prow = i/ppdof; /* integer division */
2365: pshift = i%ppdof;
2366: poffset = pi[prow];
2367: pnzi = pi[prow+1] - poffset;
2368: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2369: pJ = pj+poffset;
2370: pA = pa+poffset;
2371: for (j=0;j<pnzi;j++) {
2372: crow = (*pJ)*ppdof+pshift;
2373: cjj = cj + ci[crow];
2374: caj = ca + ci[crow];
2375: pJ++;
2376: /* Perform sparse axpy operation. Note cjj includes apj. */
2377: for (k=0,nextap=0;nextap<apnzj;k++) {
2378: if (cjj[k]==apj[nextap]) {
2379: caj[k] += (*pA)*apa[apj[nextap++]];
2380: }
2381: }
2382: flops += 2*apnzj;
2383: pA++;
2384: }
2386: /* Zero the current row info for A*P */
2387: for (j=0;j<apnzj;j++) {
2388: apa[apj[j]] = 0.;
2389: apjdense[apj[j]] = 0;
2390: }
2391: }
2393: /* Assemble the final matrix and clean up */
2394: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2395: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2396: PetscFree(apa);
2397: PetscLogFlops(flops);
2398: return(0);
2399: }
2403: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2404: {
2405: PetscErrorCode ierr;
2408: /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2409: MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
2410: ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
2411: return(0);
2412: }
2416: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2417: {
2419: SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2420: return(0);
2421: }
2426: PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2427: {
2428: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2429: Mat a = b->AIJ,B;
2430: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
2431: PetscErrorCode ierr;
2432: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2433: PetscInt *cols;
2434: PetscScalar *vals;
2437: MatGetSize(a,&m,&n);
2438: PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2439: for (i=0; i<m; i++) {
2440: nmax = PetscMax(nmax,aij->ilen[i]);
2441: for (j=0; j<dof; j++) {
2442: ilen[dof*i+j] = aij->ilen[i];
2443: }
2444: }
2445: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2446: MatSetOption(B,MAT_COLUMNS_SORTED);
2447: PetscFree(ilen);
2448: PetscMalloc(nmax*sizeof(PetscInt),&icols);
2449: ii = 0;
2450: for (i=0; i<m; i++) {
2451: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2452: for (j=0; j<dof; j++) {
2453: for (k=0; k<ncols; k++) {
2454: icols[k] = dof*cols[k]+j;
2455: }
2456: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2457: ii++;
2458: }
2459: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2460: }
2461: PetscFree(icols);
2462: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2463: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2465: if (reuse == MAT_REUSE_MATRIX) {
2466: MatHeaderReplace(A,B);
2467: } else {
2468: *newmat = B;
2469: }
2470: return(0);
2471: }
2474: #include src/mat/impls/aij/mpi/mpiaij.h
2479: PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2480: {
2481: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
2482: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2483: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2484: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2485: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2486: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2487: PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2488: PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2489: PetscInt rstart,cstart,*garray,ii,k;
2490: PetscErrorCode ierr;
2491: PetscScalar *vals,*ovals;
2494: PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);
2495: for (i=0; i<A->rmap.n/dof; i++) {
2496: nmax = PetscMax(nmax,AIJ->ilen[i]);
2497: onmax = PetscMax(onmax,OAIJ->ilen[i]);
2498: for (j=0; j<dof; j++) {
2499: dnz[dof*i+j] = AIJ->ilen[i];
2500: onz[dof*i+j] = OAIJ->ilen[i];
2501: }
2502: }
2503: MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);
2504: MatSetOption(B,MAT_COLUMNS_SORTED);
2505: PetscFree2(dnz,onz);
2507: PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2508: rstart = dof*maij->A->rmap.rstart;
2509: cstart = dof*maij->A->cmap.rstart;
2510: garray = mpiaij->garray;
2512: ii = rstart;
2513: for (i=0; i<A->rmap.n/dof; i++) {
2514: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2515: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2516: for (j=0; j<dof; j++) {
2517: for (k=0; k<ncols; k++) {
2518: icols[k] = cstart + dof*cols[k]+j;
2519: }
2520: for (k=0; k<oncols; k++) {
2521: oicols[k] = dof*garray[ocols[k]]+j;
2522: }
2523: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2524: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2525: ii++;
2526: }
2527: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2528: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2529: }
2530: PetscFree2(icols,oicols);
2532: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2533: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2535: if (reuse == MAT_REUSE_MATRIX) {
2536: PetscInt refct = A->refct; /* save A->refct */
2537: A->refct = 1;
2538: MatHeaderReplace(A,B);
2539: A->refct = refct; /* restore A->refct */
2540: } else {
2541: *newmat = B;
2542: }
2543: return(0);
2544: }
2548: /* ---------------------------------------------------------------------------------- */
2549: /*MC
2550: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2551: operations for multicomponent problems. It interpolates each component the same
2552: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
2553: and MATMPIAIJ for distributed matrices.
2555: Operations provided:
2556: + MatMult
2557: . MatMultTranspose
2558: . MatMultAdd
2559: . MatMultTransposeAdd
2560: - MatView
2562: Level: advanced
2564: M*/
2567: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2568: {
2570: PetscMPIInt size;
2571: PetscInt n;
2572: Mat_MPIMAIJ *b;
2573: Mat B;
2576: PetscObjectReference((PetscObject)A);
2578: if (dof == 1) {
2579: *maij = A;
2580: } else {
2581: MatCreate(A->comm,&B);
2582: MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);
2583: B->assembled = PETSC_TRUE;
2585: MPI_Comm_size(A->comm,&size);
2586: if (size == 1) {
2587: MatSetType(B,MATSEQMAIJ);
2588: B->ops->destroy = MatDestroy_SeqMAIJ;
2589: B->ops->view = MatView_SeqMAIJ;
2590: b = (Mat_MPIMAIJ*)B->data;
2591: b->dof = dof;
2592: b->AIJ = A;
2593: if (dof == 2) {
2594: B->ops->mult = MatMult_SeqMAIJ_2;
2595: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
2596: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
2597: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2598: } else if (dof == 3) {
2599: B->ops->mult = MatMult_SeqMAIJ_3;
2600: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
2601: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
2602: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2603: } else if (dof == 4) {
2604: B->ops->mult = MatMult_SeqMAIJ_4;
2605: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
2606: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
2607: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2608: } else if (dof == 5) {
2609: B->ops->mult = MatMult_SeqMAIJ_5;
2610: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
2611: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
2612: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2613: } else if (dof == 6) {
2614: B->ops->mult = MatMult_SeqMAIJ_6;
2615: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
2616: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
2617: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2618: } else if (dof == 7) {
2619: B->ops->mult = MatMult_SeqMAIJ_7;
2620: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
2621: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
2622: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2623: } else if (dof == 8) {
2624: B->ops->mult = MatMult_SeqMAIJ_8;
2625: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
2626: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
2627: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2628: } else if (dof == 9) {
2629: B->ops->mult = MatMult_SeqMAIJ_9;
2630: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
2631: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
2632: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2633: } else if (dof == 10) {
2634: B->ops->mult = MatMult_SeqMAIJ_10;
2635: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
2636: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
2637: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2638: } else if (dof == 16) {
2639: B->ops->mult = MatMult_SeqMAIJ_16;
2640: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
2641: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
2642: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2643: } else {
2644: SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2645: }
2646: B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2647: B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2648: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2649: } else {
2650: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2651: IS from,to;
2652: Vec gvec;
2653: PetscInt *garray,i;
2655: MatSetType(B,MATMPIMAIJ);
2656: B->ops->destroy = MatDestroy_MPIMAIJ;
2657: B->ops->view = MatView_MPIMAIJ;
2658: b = (Mat_MPIMAIJ*)B->data;
2659: b->dof = dof;
2660: b->A = A;
2661: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2662: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
2664: VecGetSize(mpiaij->lvec,&n);
2665: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2666: VecSetBlockSize(b->w,dof);
2668: /* create two temporary Index sets for build scatter gather */
2669: PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2670: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2671: ISCreateBlock(A->comm,dof,n,garray,&from);
2672: PetscFree(garray);
2673: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
2675: /* create temporary global vector to generate scatter context */
2676: VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);
2677: VecSetBlockSize(gvec,dof);
2679: /* generate the scatter context */
2680: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
2682: ISDestroy(from);
2683: ISDestroy(to);
2684: VecDestroy(gvec);
2686: B->ops->mult = MatMult_MPIMAIJ_dof;
2687: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
2688: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
2689: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2690: B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
2691: B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
2692: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
2693: }
2694: *maij = B;
2695: MatView_Private(B);
2696: }
2697: return(0);
2698: }