Actual source code: maij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  5:   This format is used for restriction and interpolation operations for 
  6:   multicomponent problems. It interpolates each component the same way
  7:   independently.

  9:      We provide:
 10:          MatMult()
 11:          MatMultTranspose()
 12:          MatMultTransposeAdd()
 13:          MatMultAdd()
 14:           and
 15:          MatCreateMAIJ(Mat,dof,Mat*)

 17:      This single directory handles both the sequential and parallel codes
 18: */

 20:  #include src/mat/impls/maij/maij.h
 21:  #include src/mat/utils/freespace.h
 22:  #include private/vecimpl.h

 26: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 27: {
 29:   PetscTruth     ismpimaij,isseqmaij;

 32:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 33:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 34:   if (ismpimaij) {
 35:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 37:     *B = b->A;
 38:   } else if (isseqmaij) {
 39:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 41:     *B = b->AIJ;
 42:   } else {
 43:     *B = A;
 44:   }
 45:   return(0);
 46: }

 50: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 51: {
 53:   Mat            Aij;

 56:   MatMAIJGetAIJ(A,&Aij);
 57:   MatCreateMAIJ(Aij,dof,B);
 58:   return(0);
 59: }

 63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 64: {
 66:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 69:   if (b->AIJ) {
 70:     MatDestroy(b->AIJ);
 71:   }
 72:   PetscFree(b);
 73:   return(0);
 74: }

 78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
 79: {
 81:   Mat            B;

 84:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
 85:   MatView(B,viewer);
 86:   MatDestroy(B);
 87:   return(0);
 88: }

 92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
 93: {
 95:   Mat            B;

 98:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
 99:   MatView(B,viewer);
100:   MatDestroy(B);
101:   return(0);
102: }

106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

112:   if (b->AIJ) {
113:     MatDestroy(b->AIJ);
114:   }
115:   if (b->OAIJ) {
116:     MatDestroy(b->OAIJ);
117:   }
118:   if (b->A) {
119:     MatDestroy(b->A);
120:   }
121:   if (b->ctx) {
122:     VecScatterDestroy(b->ctx);
123:   }
124:   if (b->w) {
125:     VecDestroy(b->w);
126:   }
127:   PetscFree(b);
128:   PetscObjectChangeTypeName((PetscObject)A,0);
129:   return(0);
130: }

132: /*MC
133:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
134:   multicomponent problems, interpolating or restricting each component the same way independently.
135:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

137:   Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd

143:   Level: advanced

145: .seealso: MatCreateSeqDense
146: M*/

151: PetscErrorCode  MatCreate_MAIJ(Mat A)
152: {
154:   Mat_MPIMAIJ    *b;
155:   PetscMPIInt    size;

158:   PetscNew(Mat_MPIMAIJ,&b);
159:   A->data  = (void*)b;
160:   PetscMemzero(A->ops,sizeof(struct _MatOps));
161:   A->factor           = 0;
162:   A->mapping          = 0;

164:   b->AIJ  = 0;
165:   b->dof  = 0;
166:   b->OAIJ = 0;
167:   b->ctx  = 0;
168:   b->w    = 0;
169:   MPI_Comm_size(A->comm,&size);
170:   if (size == 1){
171:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
172:   } else {
173:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
174:   }
175:   return(0);
176: }

179: /* --------------------------------------------------------------------------------------*/
182: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
185:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
186:   PetscScalar    *x,*y,*v,sum1, sum2;
188:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
189:   PetscInt       n,i,jrow,j;

192:   VecGetArray(xx,&x);
193:   VecGetArray(yy,&y);
194:   idx  = a->j;
195:   v    = a->a;
196:   ii   = a->i;

198:   for (i=0; i<m; i++) {
199:     jrow = ii[i];
200:     n    = ii[i+1] - jrow;
201:     sum1  = 0.0;
202:     sum2  = 0.0;
203:     for (j=0; j<n; j++) {
204:       sum1 += v[jrow]*x[2*idx[jrow]];
205:       sum2 += v[jrow]*x[2*idx[jrow]+1];
206:       jrow++;
207:      }
208:     y[2*i]   = sum1;
209:     y[2*i+1] = sum2;
210:   }

212:   PetscLogFlops(4*a->nz - 2*m);
213:   VecRestoreArray(xx,&x);
214:   VecRestoreArray(yy,&y);
215:   return(0);
216: }

220: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
221: {
222:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
223:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
224:   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
226:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

229:   VecSet(yy,zero);
230:   VecGetArray(xx,&x);
231:   VecGetArray(yy,&y);
232: 
233:   for (i=0; i<m; i++) {
234:     idx    = a->j + a->i[i] ;
235:     v      = a->a + a->i[i] ;
236:     n      = a->i[i+1] - a->i[i];
237:     alpha1 = x[2*i];
238:     alpha2 = x[2*i+1];
239:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
240:   }
241:   PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
242:   VecRestoreArray(xx,&x);
243:   VecRestoreArray(yy,&y);
244:   return(0);
245: }

249: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250: {
251:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
252:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
253:   PetscScalar    *x,*y,*v,sum1, sum2;
255:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
256:   PetscInt       n,i,jrow,j;

259:   if (yy != zz) {VecCopy(yy,zz);}
260:   VecGetArray(xx,&x);
261:   VecGetArray(zz,&y);
262:   idx  = a->j;
263:   v    = a->a;
264:   ii   = a->i;

266:   for (i=0; i<m; i++) {
267:     jrow = ii[i];
268:     n    = ii[i+1] - jrow;
269:     sum1  = 0.0;
270:     sum2  = 0.0;
271:     for (j=0; j<n; j++) {
272:       sum1 += v[jrow]*x[2*idx[jrow]];
273:       sum2 += v[jrow]*x[2*idx[jrow]+1];
274:       jrow++;
275:      }
276:     y[2*i]   += sum1;
277:     y[2*i+1] += sum2;
278:   }

280:   PetscLogFlops(4*a->nz - 2*m);
281:   VecRestoreArray(xx,&x);
282:   VecRestoreArray(zz,&y);
283:   return(0);
284: }
287: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
288: {
289:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
290:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
291:   PetscScalar    *x,*y,*v,alpha1,alpha2;
293:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

296:   if (yy != zz) {VecCopy(yy,zz);}
297:   VecGetArray(xx,&x);
298:   VecGetArray(zz,&y);
299: 
300:   for (i=0; i<m; i++) {
301:     idx   = a->j + a->i[i] ;
302:     v     = a->a + a->i[i] ;
303:     n     = a->i[i+1] - a->i[i];
304:     alpha1 = x[2*i];
305:     alpha2 = x[2*i+1];
306:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
307:   }
308:   PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
309:   VecRestoreArray(xx,&x);
310:   VecRestoreArray(zz,&y);
311:   return(0);
312: }
313: /* --------------------------------------------------------------------------------------*/
316: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
317: {
318:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
319:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
320:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
322:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
323:   PetscInt       n,i,jrow,j;

326:   VecGetArray(xx,&x);
327:   VecGetArray(yy,&y);
328:   idx  = a->j;
329:   v    = a->a;
330:   ii   = a->i;

332:   for (i=0; i<m; i++) {
333:     jrow = ii[i];
334:     n    = ii[i+1] - jrow;
335:     sum1  = 0.0;
336:     sum2  = 0.0;
337:     sum3  = 0.0;
338:     for (j=0; j<n; j++) {
339:       sum1 += v[jrow]*x[3*idx[jrow]];
340:       sum2 += v[jrow]*x[3*idx[jrow]+1];
341:       sum3 += v[jrow]*x[3*idx[jrow]+2];
342:       jrow++;
343:      }
344:     y[3*i]   = sum1;
345:     y[3*i+1] = sum2;
346:     y[3*i+2] = sum3;
347:   }

349:   PetscLogFlops(6*a->nz - 3*m);
350:   VecRestoreArray(xx,&x);
351:   VecRestoreArray(yy,&y);
352:   return(0);
353: }

357: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
358: {
359:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
360:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
361:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
363:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

366:   VecSet(yy,zero);
367:   VecGetArray(xx,&x);
368:   VecGetArray(yy,&y);
369: 
370:   for (i=0; i<m; i++) {
371:     idx    = a->j + a->i[i];
372:     v      = a->a + a->i[i];
373:     n      = a->i[i+1] - a->i[i];
374:     alpha1 = x[3*i];
375:     alpha2 = x[3*i+1];
376:     alpha3 = x[3*i+2];
377:     while (n-->0) {
378:       y[3*(*idx)]   += alpha1*(*v);
379:       y[3*(*idx)+1] += alpha2*(*v);
380:       y[3*(*idx)+2] += alpha3*(*v);
381:       idx++; v++;
382:     }
383:   }
384:   PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);
385:   VecRestoreArray(xx,&x);
386:   VecRestoreArray(yy,&y);
387:   return(0);
388: }

392: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
393: {
394:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
395:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
396:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
398:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
399:   PetscInt       n,i,jrow,j;

402:   if (yy != zz) {VecCopy(yy,zz);}
403:   VecGetArray(xx,&x);
404:   VecGetArray(zz,&y);
405:   idx  = a->j;
406:   v    = a->a;
407:   ii   = a->i;

409:   for (i=0; i<m; i++) {
410:     jrow = ii[i];
411:     n    = ii[i+1] - jrow;
412:     sum1  = 0.0;
413:     sum2  = 0.0;
414:     sum3  = 0.0;
415:     for (j=0; j<n; j++) {
416:       sum1 += v[jrow]*x[3*idx[jrow]];
417:       sum2 += v[jrow]*x[3*idx[jrow]+1];
418:       sum3 += v[jrow]*x[3*idx[jrow]+2];
419:       jrow++;
420:      }
421:     y[3*i]   += sum1;
422:     y[3*i+1] += sum2;
423:     y[3*i+2] += sum3;
424:   }

426:   PetscLogFlops(6*a->nz);
427:   VecRestoreArray(xx,&x);
428:   VecRestoreArray(zz,&y);
429:   return(0);
430: }
433: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
434: {
435:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
436:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
437:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
439:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

442:   if (yy != zz) {VecCopy(yy,zz);}
443:   VecGetArray(xx,&x);
444:   VecGetArray(zz,&y);
445:   for (i=0; i<m; i++) {
446:     idx    = a->j + a->i[i] ;
447:     v      = a->a + a->i[i] ;
448:     n      = a->i[i+1] - a->i[i];
449:     alpha1 = x[3*i];
450:     alpha2 = x[3*i+1];
451:     alpha3 = x[3*i+2];
452:     while (n-->0) {
453:       y[3*(*idx)]   += alpha1*(*v);
454:       y[3*(*idx)+1] += alpha2*(*v);
455:       y[3*(*idx)+2] += alpha3*(*v);
456:       idx++; v++;
457:     }
458:   }
459:   PetscLogFlops(6*a->nz);
460:   VecRestoreArray(xx,&x);
461:   VecRestoreArray(zz,&y);
462:   return(0);
463: }

465: /* ------------------------------------------------------------------------------*/
468: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
469: {
470:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
471:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
472:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
474:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
475:   PetscInt       n,i,jrow,j;

478:   VecGetArray(xx,&x);
479:   VecGetArray(yy,&y);
480:   idx  = a->j;
481:   v    = a->a;
482:   ii   = a->i;

484:   for (i=0; i<m; i++) {
485:     jrow = ii[i];
486:     n    = ii[i+1] - jrow;
487:     sum1  = 0.0;
488:     sum2  = 0.0;
489:     sum3  = 0.0;
490:     sum4  = 0.0;
491:     for (j=0; j<n; j++) {
492:       sum1 += v[jrow]*x[4*idx[jrow]];
493:       sum2 += v[jrow]*x[4*idx[jrow]+1];
494:       sum3 += v[jrow]*x[4*idx[jrow]+2];
495:       sum4 += v[jrow]*x[4*idx[jrow]+3];
496:       jrow++;
497:      }
498:     y[4*i]   = sum1;
499:     y[4*i+1] = sum2;
500:     y[4*i+2] = sum3;
501:     y[4*i+3] = sum4;
502:   }

504:   PetscLogFlops(8*a->nz - 4*m);
505:   VecRestoreArray(xx,&x);
506:   VecRestoreArray(yy,&y);
507:   return(0);
508: }

512: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
513: {
514:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
515:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
516:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
518:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

521:   VecSet(yy,zero);
522:   VecGetArray(xx,&x);
523:   VecGetArray(yy,&y);
524:   for (i=0; i<m; i++) {
525:     idx    = a->j + a->i[i] ;
526:     v      = a->a + a->i[i] ;
527:     n      = a->i[i+1] - a->i[i];
528:     alpha1 = x[4*i];
529:     alpha2 = x[4*i+1];
530:     alpha3 = x[4*i+2];
531:     alpha4 = x[4*i+3];
532:     while (n-->0) {
533:       y[4*(*idx)]   += alpha1*(*v);
534:       y[4*(*idx)+1] += alpha2*(*v);
535:       y[4*(*idx)+2] += alpha3*(*v);
536:       y[4*(*idx)+3] += alpha4*(*v);
537:       idx++; v++;
538:     }
539:   }
540:   PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
541:   VecRestoreArray(xx,&x);
542:   VecRestoreArray(yy,&y);
543:   return(0);
544: }

548: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
549: {
550:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
551:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
552:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
554:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
555:   PetscInt       n,i,jrow,j;

558:   if (yy != zz) {VecCopy(yy,zz);}
559:   VecGetArray(xx,&x);
560:   VecGetArray(zz,&y);
561:   idx  = a->j;
562:   v    = a->a;
563:   ii   = a->i;

565:   for (i=0; i<m; i++) {
566:     jrow = ii[i];
567:     n    = ii[i+1] - jrow;
568:     sum1  = 0.0;
569:     sum2  = 0.0;
570:     sum3  = 0.0;
571:     sum4  = 0.0;
572:     for (j=0; j<n; j++) {
573:       sum1 += v[jrow]*x[4*idx[jrow]];
574:       sum2 += v[jrow]*x[4*idx[jrow]+1];
575:       sum3 += v[jrow]*x[4*idx[jrow]+2];
576:       sum4 += v[jrow]*x[4*idx[jrow]+3];
577:       jrow++;
578:      }
579:     y[4*i]   += sum1;
580:     y[4*i+1] += sum2;
581:     y[4*i+2] += sum3;
582:     y[4*i+3] += sum4;
583:   }

585:   PetscLogFlops(8*a->nz - 4*m);
586:   VecRestoreArray(xx,&x);
587:   VecRestoreArray(zz,&y);
588:   return(0);
589: }
592: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
593: {
594:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
595:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
596:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
598:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

601:   if (yy != zz) {VecCopy(yy,zz);}
602:   VecGetArray(xx,&x);
603:   VecGetArray(zz,&y);
604: 
605:   for (i=0; i<m; i++) {
606:     idx    = a->j + a->i[i] ;
607:     v      = a->a + a->i[i] ;
608:     n      = a->i[i+1] - a->i[i];
609:     alpha1 = x[4*i];
610:     alpha2 = x[4*i+1];
611:     alpha3 = x[4*i+2];
612:     alpha4 = x[4*i+3];
613:     while (n-->0) {
614:       y[4*(*idx)]   += alpha1*(*v);
615:       y[4*(*idx)+1] += alpha2*(*v);
616:       y[4*(*idx)+2] += alpha3*(*v);
617:       y[4*(*idx)+3] += alpha4*(*v);
618:       idx++; v++;
619:     }
620:   }
621:   PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
622:   VecRestoreArray(xx,&x);
623:   VecRestoreArray(zz,&y);
624:   return(0);
625: }
626: /* ------------------------------------------------------------------------------*/

630: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
631: {
632:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
633:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
634:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
636:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
637:   PetscInt       n,i,jrow,j;

640:   VecGetArray(xx,&x);
641:   VecGetArray(yy,&y);
642:   idx  = a->j;
643:   v    = a->a;
644:   ii   = a->i;

646:   for (i=0; i<m; i++) {
647:     jrow = ii[i];
648:     n    = ii[i+1] - jrow;
649:     sum1  = 0.0;
650:     sum2  = 0.0;
651:     sum3  = 0.0;
652:     sum4  = 0.0;
653:     sum5  = 0.0;
654:     for (j=0; j<n; j++) {
655:       sum1 += v[jrow]*x[5*idx[jrow]];
656:       sum2 += v[jrow]*x[5*idx[jrow]+1];
657:       sum3 += v[jrow]*x[5*idx[jrow]+2];
658:       sum4 += v[jrow]*x[5*idx[jrow]+3];
659:       sum5 += v[jrow]*x[5*idx[jrow]+4];
660:       jrow++;
661:      }
662:     y[5*i]   = sum1;
663:     y[5*i+1] = sum2;
664:     y[5*i+2] = sum3;
665:     y[5*i+3] = sum4;
666:     y[5*i+4] = sum5;
667:   }

669:   PetscLogFlops(10*a->nz - 5*m);
670:   VecRestoreArray(xx,&x);
671:   VecRestoreArray(yy,&y);
672:   return(0);
673: }

677: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
678: {
679:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
680:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
681:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
683:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

686:   VecSet(yy,zero);
687:   VecGetArray(xx,&x);
688:   VecGetArray(yy,&y);
689: 
690:   for (i=0; i<m; i++) {
691:     idx    = a->j + a->i[i] ;
692:     v      = a->a + a->i[i] ;
693:     n      = a->i[i+1] - a->i[i];
694:     alpha1 = x[5*i];
695:     alpha2 = x[5*i+1];
696:     alpha3 = x[5*i+2];
697:     alpha4 = x[5*i+3];
698:     alpha5 = x[5*i+4];
699:     while (n-->0) {
700:       y[5*(*idx)]   += alpha1*(*v);
701:       y[5*(*idx)+1] += alpha2*(*v);
702:       y[5*(*idx)+2] += alpha3*(*v);
703:       y[5*(*idx)+3] += alpha4*(*v);
704:       y[5*(*idx)+4] += alpha5*(*v);
705:       idx++; v++;
706:     }
707:   }
708:   PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);
709:   VecRestoreArray(xx,&x);
710:   VecRestoreArray(yy,&y);
711:   return(0);
712: }

716: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
717: {
718:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
719:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
720:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
722:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
723:   PetscInt       n,i,jrow,j;

726:   if (yy != zz) {VecCopy(yy,zz);}
727:   VecGetArray(xx,&x);
728:   VecGetArray(zz,&y);
729:   idx  = a->j;
730:   v    = a->a;
731:   ii   = a->i;

733:   for (i=0; i<m; i++) {
734:     jrow = ii[i];
735:     n    = ii[i+1] - jrow;
736:     sum1  = 0.0;
737:     sum2  = 0.0;
738:     sum3  = 0.0;
739:     sum4  = 0.0;
740:     sum5  = 0.0;
741:     for (j=0; j<n; j++) {
742:       sum1 += v[jrow]*x[5*idx[jrow]];
743:       sum2 += v[jrow]*x[5*idx[jrow]+1];
744:       sum3 += v[jrow]*x[5*idx[jrow]+2];
745:       sum4 += v[jrow]*x[5*idx[jrow]+3];
746:       sum5 += v[jrow]*x[5*idx[jrow]+4];
747:       jrow++;
748:      }
749:     y[5*i]   += sum1;
750:     y[5*i+1] += sum2;
751:     y[5*i+2] += sum3;
752:     y[5*i+3] += sum4;
753:     y[5*i+4] += sum5;
754:   }

756:   PetscLogFlops(10*a->nz);
757:   VecRestoreArray(xx,&x);
758:   VecRestoreArray(zz,&y);
759:   return(0);
760: }

764: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
765: {
766:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
767:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
768:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
770:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

773:   if (yy != zz) {VecCopy(yy,zz);}
774:   VecGetArray(xx,&x);
775:   VecGetArray(zz,&y);
776: 
777:   for (i=0; i<m; i++) {
778:     idx    = a->j + a->i[i] ;
779:     v      = a->a + a->i[i] ;
780:     n      = a->i[i+1] - a->i[i];
781:     alpha1 = x[5*i];
782:     alpha2 = x[5*i+1];
783:     alpha3 = x[5*i+2];
784:     alpha4 = x[5*i+3];
785:     alpha5 = x[5*i+4];
786:     while (n-->0) {
787:       y[5*(*idx)]   += alpha1*(*v);
788:       y[5*(*idx)+1] += alpha2*(*v);
789:       y[5*(*idx)+2] += alpha3*(*v);
790:       y[5*(*idx)+3] += alpha4*(*v);
791:       y[5*(*idx)+4] += alpha5*(*v);
792:       idx++; v++;
793:     }
794:   }
795:   PetscLogFlops(10*a->nz);
796:   VecRestoreArray(xx,&x);
797:   VecRestoreArray(zz,&y);
798:   return(0);
799: }

801: /* ------------------------------------------------------------------------------*/
804: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
805: {
806:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
807:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
808:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
810:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
811:   PetscInt       n,i,jrow,j;

814:   VecGetArray(xx,&x);
815:   VecGetArray(yy,&y);
816:   idx  = a->j;
817:   v    = a->a;
818:   ii   = a->i;

820:   for (i=0; i<m; i++) {
821:     jrow = ii[i];
822:     n    = ii[i+1] - jrow;
823:     sum1  = 0.0;
824:     sum2  = 0.0;
825:     sum3  = 0.0;
826:     sum4  = 0.0;
827:     sum5  = 0.0;
828:     sum6  = 0.0;
829:     for (j=0; j<n; j++) {
830:       sum1 += v[jrow]*x[6*idx[jrow]];
831:       sum2 += v[jrow]*x[6*idx[jrow]+1];
832:       sum3 += v[jrow]*x[6*idx[jrow]+2];
833:       sum4 += v[jrow]*x[6*idx[jrow]+3];
834:       sum5 += v[jrow]*x[6*idx[jrow]+4];
835:       sum6 += v[jrow]*x[6*idx[jrow]+5];
836:       jrow++;
837:      }
838:     y[6*i]   = sum1;
839:     y[6*i+1] = sum2;
840:     y[6*i+2] = sum3;
841:     y[6*i+3] = sum4;
842:     y[6*i+4] = sum5;
843:     y[6*i+5] = sum6;
844:   }

846:   PetscLogFlops(12*a->nz - 6*m);
847:   VecRestoreArray(xx,&x);
848:   VecRestoreArray(yy,&y);
849:   return(0);
850: }

854: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
855: {
856:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
857:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
858:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
860:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

863:   VecSet(yy,zero);
864:   VecGetArray(xx,&x);
865:   VecGetArray(yy,&y);

867:   for (i=0; i<m; i++) {
868:     idx    = a->j + a->i[i] ;
869:     v      = a->a + a->i[i] ;
870:     n      = a->i[i+1] - a->i[i];
871:     alpha1 = x[6*i];
872:     alpha2 = x[6*i+1];
873:     alpha3 = x[6*i+2];
874:     alpha4 = x[6*i+3];
875:     alpha5 = x[6*i+4];
876:     alpha6 = x[6*i+5];
877:     while (n-->0) {
878:       y[6*(*idx)]   += alpha1*(*v);
879:       y[6*(*idx)+1] += alpha2*(*v);
880:       y[6*(*idx)+2] += alpha3*(*v);
881:       y[6*(*idx)+3] += alpha4*(*v);
882:       y[6*(*idx)+4] += alpha5*(*v);
883:       y[6*(*idx)+5] += alpha6*(*v);
884:       idx++; v++;
885:     }
886:   }
887:   PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);
888:   VecRestoreArray(xx,&x);
889:   VecRestoreArray(yy,&y);
890:   return(0);
891: }

895: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
896: {
897:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
898:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
899:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
901:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
902:   PetscInt       n,i,jrow,j;

905:   if (yy != zz) {VecCopy(yy,zz);}
906:   VecGetArray(xx,&x);
907:   VecGetArray(zz,&y);
908:   idx  = a->j;
909:   v    = a->a;
910:   ii   = a->i;

912:   for (i=0; i<m; i++) {
913:     jrow = ii[i];
914:     n    = ii[i+1] - jrow;
915:     sum1  = 0.0;
916:     sum2  = 0.0;
917:     sum3  = 0.0;
918:     sum4  = 0.0;
919:     sum5  = 0.0;
920:     sum6  = 0.0;
921:     for (j=0; j<n; j++) {
922:       sum1 += v[jrow]*x[6*idx[jrow]];
923:       sum2 += v[jrow]*x[6*idx[jrow]+1];
924:       sum3 += v[jrow]*x[6*idx[jrow]+2];
925:       sum4 += v[jrow]*x[6*idx[jrow]+3];
926:       sum5 += v[jrow]*x[6*idx[jrow]+4];
927:       sum6 += v[jrow]*x[6*idx[jrow]+5];
928:       jrow++;
929:      }
930:     y[6*i]   += sum1;
931:     y[6*i+1] += sum2;
932:     y[6*i+2] += sum3;
933:     y[6*i+3] += sum4;
934:     y[6*i+4] += sum5;
935:     y[6*i+5] += sum6;
936:   }

938:   PetscLogFlops(12*a->nz);
939:   VecRestoreArray(xx,&x);
940:   VecRestoreArray(zz,&y);
941:   return(0);
942: }

946: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
947: {
948:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
949:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
950:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
952:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

955:   if (yy != zz) {VecCopy(yy,zz);}
956:   VecGetArray(xx,&x);
957:   VecGetArray(zz,&y);
958: 
959:   for (i=0; i<m; i++) {
960:     idx    = a->j + a->i[i] ;
961:     v      = a->a + a->i[i] ;
962:     n      = a->i[i+1] - a->i[i];
963:     alpha1 = x[6*i];
964:     alpha2 = x[6*i+1];
965:     alpha3 = x[6*i+2];
966:     alpha4 = x[6*i+3];
967:     alpha5 = x[6*i+4];
968:     alpha6 = x[6*i+5];
969:     while (n-->0) {
970:       y[6*(*idx)]   += alpha1*(*v);
971:       y[6*(*idx)+1] += alpha2*(*v);
972:       y[6*(*idx)+2] += alpha3*(*v);
973:       y[6*(*idx)+3] += alpha4*(*v);
974:       y[6*(*idx)+4] += alpha5*(*v);
975:       y[6*(*idx)+5] += alpha6*(*v);
976:       idx++; v++;
977:     }
978:   }
979:   PetscLogFlops(12*a->nz);
980:   VecRestoreArray(xx,&x);
981:   VecRestoreArray(zz,&y);
982:   return(0);
983: }

985: /* ------------------------------------------------------------------------------*/
988: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
989: {
990:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
991:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
992:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
994:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
995:   PetscInt       n,i,jrow,j;

998:   VecGetArray(xx,&x);
999:   VecGetArray(yy,&y);
1000:   idx  = a->j;
1001:   v    = a->a;
1002:   ii   = a->i;

1004:   for (i=0; i<m; i++) {
1005:     jrow = ii[i];
1006:     n    = ii[i+1] - jrow;
1007:     sum1  = 0.0;
1008:     sum2  = 0.0;
1009:     sum3  = 0.0;
1010:     sum4  = 0.0;
1011:     sum5  = 0.0;
1012:     sum6  = 0.0;
1013:     sum7  = 0.0;
1014:     for (j=0; j<n; j++) {
1015:       sum1 += v[jrow]*x[7*idx[jrow]];
1016:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1017:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1018:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1019:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1020:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1021:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1022:       jrow++;
1023:      }
1024:     y[7*i]   = sum1;
1025:     y[7*i+1] = sum2;
1026:     y[7*i+2] = sum3;
1027:     y[7*i+3] = sum4;
1028:     y[7*i+4] = sum5;
1029:     y[7*i+5] = sum6;
1030:     y[7*i+6] = sum7;
1031:   }

1033:   PetscLogFlops(14*a->nz - 7*m);
1034:   VecRestoreArray(xx,&x);
1035:   VecRestoreArray(yy,&y);
1036:   return(0);
1037: }

1041: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1042: {
1043:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1044:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1045:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1047:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1050:   VecSet(yy,zero);
1051:   VecGetArray(xx,&x);
1052:   VecGetArray(yy,&y);

1054:   for (i=0; i<m; i++) {
1055:     idx    = a->j + a->i[i] ;
1056:     v      = a->a + a->i[i] ;
1057:     n      = a->i[i+1] - a->i[i];
1058:     alpha1 = x[7*i];
1059:     alpha2 = x[7*i+1];
1060:     alpha3 = x[7*i+2];
1061:     alpha4 = x[7*i+3];
1062:     alpha5 = x[7*i+4];
1063:     alpha6 = x[7*i+5];
1064:     alpha7 = x[7*i+6];
1065:     while (n-->0) {
1066:       y[7*(*idx)]   += alpha1*(*v);
1067:       y[7*(*idx)+1] += alpha2*(*v);
1068:       y[7*(*idx)+2] += alpha3*(*v);
1069:       y[7*(*idx)+3] += alpha4*(*v);
1070:       y[7*(*idx)+4] += alpha5*(*v);
1071:       y[7*(*idx)+5] += alpha6*(*v);
1072:       y[7*(*idx)+6] += alpha7*(*v);
1073:       idx++; v++;
1074:     }
1075:   }
1076:   PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);
1077:   VecRestoreArray(xx,&x);
1078:   VecRestoreArray(yy,&y);
1079:   return(0);
1080: }

1084: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1085: {
1086:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1087:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1088:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1090:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1091:   PetscInt       n,i,jrow,j;

1094:   if (yy != zz) {VecCopy(yy,zz);}
1095:   VecGetArray(xx,&x);
1096:   VecGetArray(zz,&y);
1097:   idx  = a->j;
1098:   v    = a->a;
1099:   ii   = a->i;

1101:   for (i=0; i<m; i++) {
1102:     jrow = ii[i];
1103:     n    = ii[i+1] - jrow;
1104:     sum1  = 0.0;
1105:     sum2  = 0.0;
1106:     sum3  = 0.0;
1107:     sum4  = 0.0;
1108:     sum5  = 0.0;
1109:     sum6  = 0.0;
1110:     sum7  = 0.0;
1111:     for (j=0; j<n; j++) {
1112:       sum1 += v[jrow]*x[7*idx[jrow]];
1113:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1114:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1115:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1116:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1117:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1118:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1119:       jrow++;
1120:      }
1121:     y[7*i]   += sum1;
1122:     y[7*i+1] += sum2;
1123:     y[7*i+2] += sum3;
1124:     y[7*i+3] += sum4;
1125:     y[7*i+4] += sum5;
1126:     y[7*i+5] += sum6;
1127:     y[7*i+6] += sum7;
1128:   }

1130:   PetscLogFlops(14*a->nz);
1131:   VecRestoreArray(xx,&x);
1132:   VecRestoreArray(zz,&y);
1133:   return(0);
1134: }

1138: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1139: {
1140:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1141:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1142:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1144:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1147:   if (yy != zz) {VecCopy(yy,zz);}
1148:   VecGetArray(xx,&x);
1149:   VecGetArray(zz,&y);
1150:   for (i=0; i<m; i++) {
1151:     idx    = a->j + a->i[i] ;
1152:     v      = a->a + a->i[i] ;
1153:     n      = a->i[i+1] - a->i[i];
1154:     alpha1 = x[7*i];
1155:     alpha2 = x[7*i+1];
1156:     alpha3 = x[7*i+2];
1157:     alpha4 = x[7*i+3];
1158:     alpha5 = x[7*i+4];
1159:     alpha6 = x[7*i+5];
1160:     alpha7 = x[7*i+6];
1161:     while (n-->0) {
1162:       y[7*(*idx)]   += alpha1*(*v);
1163:       y[7*(*idx)+1] += alpha2*(*v);
1164:       y[7*(*idx)+2] += alpha3*(*v);
1165:       y[7*(*idx)+3] += alpha4*(*v);
1166:       y[7*(*idx)+4] += alpha5*(*v);
1167:       y[7*(*idx)+5] += alpha6*(*v);
1168:       y[7*(*idx)+6] += alpha7*(*v);
1169:       idx++; v++;
1170:     }
1171:   }
1172:   PetscLogFlops(14*a->nz);
1173:   VecRestoreArray(xx,&x);
1174:   VecRestoreArray(zz,&y);
1175:   return(0);
1176: }

1180: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1181: {
1182:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1183:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1184:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1186:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1187:   PetscInt       n,i,jrow,j;

1190:   VecGetArray(xx,&x);
1191:   VecGetArray(yy,&y);
1192:   idx  = a->j;
1193:   v    = a->a;
1194:   ii   = a->i;

1196:   for (i=0; i<m; i++) {
1197:     jrow = ii[i];
1198:     n    = ii[i+1] - jrow;
1199:     sum1  = 0.0;
1200:     sum2  = 0.0;
1201:     sum3  = 0.0;
1202:     sum4  = 0.0;
1203:     sum5  = 0.0;
1204:     sum6  = 0.0;
1205:     sum7  = 0.0;
1206:     sum8  = 0.0;
1207:     for (j=0; j<n; j++) {
1208:       sum1 += v[jrow]*x[8*idx[jrow]];
1209:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1210:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1211:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1212:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1213:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1214:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1215:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1216:       jrow++;
1217:      }
1218:     y[8*i]   = sum1;
1219:     y[8*i+1] = sum2;
1220:     y[8*i+2] = sum3;
1221:     y[8*i+3] = sum4;
1222:     y[8*i+4] = sum5;
1223:     y[8*i+5] = sum6;
1224:     y[8*i+6] = sum7;
1225:     y[8*i+7] = sum8;
1226:   }

1228:   PetscLogFlops(16*a->nz - 8*m);
1229:   VecRestoreArray(xx,&x);
1230:   VecRestoreArray(yy,&y);
1231:   return(0);
1232: }

1236: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1237: {
1238:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1239:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1240:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1242:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1245:   VecSet(yy,zero);
1246:   VecGetArray(xx,&x);
1247:   VecGetArray(yy,&y);

1249:   for (i=0; i<m; i++) {
1250:     idx    = a->j + a->i[i] ;
1251:     v      = a->a + a->i[i] ;
1252:     n      = a->i[i+1] - a->i[i];
1253:     alpha1 = x[8*i];
1254:     alpha2 = x[8*i+1];
1255:     alpha3 = x[8*i+2];
1256:     alpha4 = x[8*i+3];
1257:     alpha5 = x[8*i+4];
1258:     alpha6 = x[8*i+5];
1259:     alpha7 = x[8*i+6];
1260:     alpha8 = x[8*i+7];
1261:     while (n-->0) {
1262:       y[8*(*idx)]   += alpha1*(*v);
1263:       y[8*(*idx)+1] += alpha2*(*v);
1264:       y[8*(*idx)+2] += alpha3*(*v);
1265:       y[8*(*idx)+3] += alpha4*(*v);
1266:       y[8*(*idx)+4] += alpha5*(*v);
1267:       y[8*(*idx)+5] += alpha6*(*v);
1268:       y[8*(*idx)+6] += alpha7*(*v);
1269:       y[8*(*idx)+7] += alpha8*(*v);
1270:       idx++; v++;
1271:     }
1272:   }
1273:   PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);
1274:   VecRestoreArray(xx,&x);
1275:   VecRestoreArray(yy,&y);
1276:   return(0);
1277: }

1281: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1282: {
1283:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1284:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1285:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1287:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1288:   PetscInt       n,i,jrow,j;

1291:   if (yy != zz) {VecCopy(yy,zz);}
1292:   VecGetArray(xx,&x);
1293:   VecGetArray(zz,&y);
1294:   idx  = a->j;
1295:   v    = a->a;
1296:   ii   = a->i;

1298:   for (i=0; i<m; i++) {
1299:     jrow = ii[i];
1300:     n    = ii[i+1] - jrow;
1301:     sum1  = 0.0;
1302:     sum2  = 0.0;
1303:     sum3  = 0.0;
1304:     sum4  = 0.0;
1305:     sum5  = 0.0;
1306:     sum6  = 0.0;
1307:     sum7  = 0.0;
1308:     sum8  = 0.0;
1309:     for (j=0; j<n; j++) {
1310:       sum1 += v[jrow]*x[8*idx[jrow]];
1311:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1312:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1313:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1314:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1315:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1316:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1317:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1318:       jrow++;
1319:      }
1320:     y[8*i]   += sum1;
1321:     y[8*i+1] += sum2;
1322:     y[8*i+2] += sum3;
1323:     y[8*i+3] += sum4;
1324:     y[8*i+4] += sum5;
1325:     y[8*i+5] += sum6;
1326:     y[8*i+6] += sum7;
1327:     y[8*i+7] += sum8;
1328:   }

1330:   PetscLogFlops(16*a->nz);
1331:   VecRestoreArray(xx,&x);
1332:   VecRestoreArray(zz,&y);
1333:   return(0);
1334: }

1338: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1339: {
1340:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1341:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1342:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1344:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1347:   if (yy != zz) {VecCopy(yy,zz);}
1348:   VecGetArray(xx,&x);
1349:   VecGetArray(zz,&y);
1350:   for (i=0; i<m; i++) {
1351:     idx    = a->j + a->i[i] ;
1352:     v      = a->a + a->i[i] ;
1353:     n      = a->i[i+1] - a->i[i];
1354:     alpha1 = x[8*i];
1355:     alpha2 = x[8*i+1];
1356:     alpha3 = x[8*i+2];
1357:     alpha4 = x[8*i+3];
1358:     alpha5 = x[8*i+4];
1359:     alpha6 = x[8*i+5];
1360:     alpha7 = x[8*i+6];
1361:     alpha8 = x[8*i+7];
1362:     while (n-->0) {
1363:       y[8*(*idx)]   += alpha1*(*v);
1364:       y[8*(*idx)+1] += alpha2*(*v);
1365:       y[8*(*idx)+2] += alpha3*(*v);
1366:       y[8*(*idx)+3] += alpha4*(*v);
1367:       y[8*(*idx)+4] += alpha5*(*v);
1368:       y[8*(*idx)+5] += alpha6*(*v);
1369:       y[8*(*idx)+6] += alpha7*(*v);
1370:       y[8*(*idx)+7] += alpha8*(*v);
1371:       idx++; v++;
1372:     }
1373:   }
1374:   PetscLogFlops(16*a->nz);
1375:   VecRestoreArray(xx,&x);
1376:   VecRestoreArray(zz,&y);
1377:   return(0);
1378: }

1380: /* ------------------------------------------------------------------------------*/
1383: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1384: {
1385:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1386:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1387:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1389:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1390:   PetscInt       n,i,jrow,j;

1393:   VecGetArray(xx,&x);
1394:   VecGetArray(yy,&y);
1395:   idx  = a->j;
1396:   v    = a->a;
1397:   ii   = a->i;

1399:   for (i=0; i<m; i++) {
1400:     jrow = ii[i];
1401:     n    = ii[i+1] - jrow;
1402:     sum1  = 0.0;
1403:     sum2  = 0.0;
1404:     sum3  = 0.0;
1405:     sum4  = 0.0;
1406:     sum5  = 0.0;
1407:     sum6  = 0.0;
1408:     sum7  = 0.0;
1409:     sum8  = 0.0;
1410:     sum9  = 0.0;
1411:     for (j=0; j<n; j++) {
1412:       sum1 += v[jrow]*x[9*idx[jrow]];
1413:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1414:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1415:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1416:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1417:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1418:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1419:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1420:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1421:       jrow++;
1422:      }
1423:     y[9*i]   = sum1;
1424:     y[9*i+1] = sum2;
1425:     y[9*i+2] = sum3;
1426:     y[9*i+3] = sum4;
1427:     y[9*i+4] = sum5;
1428:     y[9*i+5] = sum6;
1429:     y[9*i+6] = sum7;
1430:     y[9*i+7] = sum8;
1431:     y[9*i+8] = sum9;
1432:   }

1434:   PetscLogFlops(18*a->nz - 9*m);
1435:   VecRestoreArray(xx,&x);
1436:   VecRestoreArray(yy,&y);
1437:   return(0);
1438: }

1440: /* ------------------------------------------------------------------------------*/

1444: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1445: {
1446:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1447:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1448:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1450:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1453:   VecSet(yy,zero);
1454:   VecGetArray(xx,&x);
1455:   VecGetArray(yy,&y);

1457:   for (i=0; i<m; i++) {
1458:     idx    = a->j + a->i[i] ;
1459:     v      = a->a + a->i[i] ;
1460:     n      = a->i[i+1] - a->i[i];
1461:     alpha1 = x[9*i];
1462:     alpha2 = x[9*i+1];
1463:     alpha3 = x[9*i+2];
1464:     alpha4 = x[9*i+3];
1465:     alpha5 = x[9*i+4];
1466:     alpha6 = x[9*i+5];
1467:     alpha7 = x[9*i+6];
1468:     alpha8 = x[9*i+7];
1469:     alpha9 = x[9*i+8];
1470:     while (n-->0) {
1471:       y[9*(*idx)]   += alpha1*(*v);
1472:       y[9*(*idx)+1] += alpha2*(*v);
1473:       y[9*(*idx)+2] += alpha3*(*v);
1474:       y[9*(*idx)+3] += alpha4*(*v);
1475:       y[9*(*idx)+4] += alpha5*(*v);
1476:       y[9*(*idx)+5] += alpha6*(*v);
1477:       y[9*(*idx)+6] += alpha7*(*v);
1478:       y[9*(*idx)+7] += alpha8*(*v);
1479:       y[9*(*idx)+8] += alpha9*(*v);
1480:       idx++; v++;
1481:     }
1482:   }
1483:   PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);
1484:   VecRestoreArray(xx,&x);
1485:   VecRestoreArray(yy,&y);
1486:   return(0);
1487: }

1491: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1492: {
1493:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1494:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1495:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1497:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1498:   PetscInt       n,i,jrow,j;

1501:   if (yy != zz) {VecCopy(yy,zz);}
1502:   VecGetArray(xx,&x);
1503:   VecGetArray(zz,&y);
1504:   idx  = a->j;
1505:   v    = a->a;
1506:   ii   = a->i;

1508:   for (i=0; i<m; i++) {
1509:     jrow = ii[i];
1510:     n    = ii[i+1] - jrow;
1511:     sum1  = 0.0;
1512:     sum2  = 0.0;
1513:     sum3  = 0.0;
1514:     sum4  = 0.0;
1515:     sum5  = 0.0;
1516:     sum6  = 0.0;
1517:     sum7  = 0.0;
1518:     sum8  = 0.0;
1519:     sum9  = 0.0;
1520:     for (j=0; j<n; j++) {
1521:       sum1 += v[jrow]*x[9*idx[jrow]];
1522:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1523:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1524:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1525:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1526:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1527:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1528:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1529:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1530:       jrow++;
1531:      }
1532:     y[9*i]   += sum1;
1533:     y[9*i+1] += sum2;
1534:     y[9*i+2] += sum3;
1535:     y[9*i+3] += sum4;
1536:     y[9*i+4] += sum5;
1537:     y[9*i+5] += sum6;
1538:     y[9*i+6] += sum7;
1539:     y[9*i+7] += sum8;
1540:     y[9*i+8] += sum9;
1541:   }

1543:   PetscLogFlops(18*a->nz);
1544:   VecRestoreArray(xx,&x);
1545:   VecRestoreArray(zz,&y);
1546:   return(0);
1547: }

1551: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1552: {
1553:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1554:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1555:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1557:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1560:   if (yy != zz) {VecCopy(yy,zz);}
1561:   VecGetArray(xx,&x);
1562:   VecGetArray(zz,&y);
1563:   for (i=0; i<m; i++) {
1564:     idx    = a->j + a->i[i] ;
1565:     v      = a->a + a->i[i] ;
1566:     n      = a->i[i+1] - a->i[i];
1567:     alpha1 = x[9*i];
1568:     alpha2 = x[9*i+1];
1569:     alpha3 = x[9*i+2];
1570:     alpha4 = x[9*i+3];
1571:     alpha5 = x[9*i+4];
1572:     alpha6 = x[9*i+5];
1573:     alpha7 = x[9*i+6];
1574:     alpha8 = x[9*i+7];
1575:     alpha9 = x[9*i+8];
1576:     while (n-->0) {
1577:       y[9*(*idx)]   += alpha1*(*v);
1578:       y[9*(*idx)+1] += alpha2*(*v);
1579:       y[9*(*idx)+2] += alpha3*(*v);
1580:       y[9*(*idx)+3] += alpha4*(*v);
1581:       y[9*(*idx)+4] += alpha5*(*v);
1582:       y[9*(*idx)+5] += alpha6*(*v);
1583:       y[9*(*idx)+6] += alpha7*(*v);
1584:       y[9*(*idx)+7] += alpha8*(*v);
1585:       y[9*(*idx)+8] += alpha9*(*v);
1586:       idx++; v++;
1587:     }
1588:   }
1589:   PetscLogFlops(18*a->nz);
1590:   VecRestoreArray(xx,&x);
1591:   VecRestoreArray(zz,&y);
1592:   return(0);
1593: }
1594: /*--------------------------------------------------------------------------------------------*/
1597: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1598: {
1599:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1600:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1601:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1603:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1604:   PetscInt       n,i,jrow,j;

1607:   VecGetArray(xx,&x);
1608:   VecGetArray(yy,&y);
1609:   idx  = a->j;
1610:   v    = a->a;
1611:   ii   = a->i;

1613:   for (i=0; i<m; i++) {
1614:     jrow = ii[i];
1615:     n    = ii[i+1] - jrow;
1616:     sum1  = 0.0;
1617:     sum2  = 0.0;
1618:     sum3  = 0.0;
1619:     sum4  = 0.0;
1620:     sum5  = 0.0;
1621:     sum6  = 0.0;
1622:     sum7  = 0.0;
1623:     sum8  = 0.0;
1624:     sum9  = 0.0;
1625:     sum10 = 0.0;
1626:     for (j=0; j<n; j++) {
1627:       sum1  += v[jrow]*x[10*idx[jrow]];
1628:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1629:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1630:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1631:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1632:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1633:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1634:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1635:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1636:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1637:       jrow++;
1638:      }
1639:     y[10*i]   = sum1;
1640:     y[10*i+1] = sum2;
1641:     y[10*i+2] = sum3;
1642:     y[10*i+3] = sum4;
1643:     y[10*i+4] = sum5;
1644:     y[10*i+5] = sum6;
1645:     y[10*i+6] = sum7;
1646:     y[10*i+7] = sum8;
1647:     y[10*i+8] = sum9;
1648:     y[10*i+9] = sum10;
1649:   }

1651:   PetscLogFlops(20*a->nz - 10*m);
1652:   VecRestoreArray(xx,&x);
1653:   VecRestoreArray(yy,&y);
1654:   return(0);
1655: }

1659: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1660: {
1661:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1662:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1663:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1665:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1666:   PetscInt       n,i,jrow,j;

1669:   if (yy != zz) {VecCopy(yy,zz);}
1670:   VecGetArray(xx,&x);
1671:   VecGetArray(zz,&y);
1672:   idx  = a->j;
1673:   v    = a->a;
1674:   ii   = a->i;

1676:   for (i=0; i<m; i++) {
1677:     jrow = ii[i];
1678:     n    = ii[i+1] - jrow;
1679:     sum1  = 0.0;
1680:     sum2  = 0.0;
1681:     sum3  = 0.0;
1682:     sum4  = 0.0;
1683:     sum5  = 0.0;
1684:     sum6  = 0.0;
1685:     sum7  = 0.0;
1686:     sum8  = 0.0;
1687:     sum9  = 0.0;
1688:     sum10 = 0.0;
1689:     for (j=0; j<n; j++) {
1690:       sum1  += v[jrow]*x[10*idx[jrow]];
1691:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1692:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1693:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1694:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1695:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1696:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1697:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1698:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1699:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1700:       jrow++;
1701:      }
1702:     y[10*i]   += sum1;
1703:     y[10*i+1] += sum2;
1704:     y[10*i+2] += sum3;
1705:     y[10*i+3] += sum4;
1706:     y[10*i+4] += sum5;
1707:     y[10*i+5] += sum6;
1708:     y[10*i+6] += sum7;
1709:     y[10*i+7] += sum8;
1710:     y[10*i+8] += sum9;
1711:     y[10*i+9] += sum10;
1712:   }

1714:   PetscLogFlops(20*a->nz - 10*m);
1715:   VecRestoreArray(xx,&x);
1716:   VecRestoreArray(yy,&y);
1717:   return(0);
1718: }

1722: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1723: {
1724:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1725:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1726:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1728:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1731:   VecSet(yy,zero);
1732:   VecGetArray(xx,&x);
1733:   VecGetArray(yy,&y);

1735:   for (i=0; i<m; i++) {
1736:     idx    = a->j + a->i[i] ;
1737:     v      = a->a + a->i[i] ;
1738:     n      = a->i[i+1] - a->i[i];
1739:     alpha1 = x[10*i];
1740:     alpha2 = x[10*i+1];
1741:     alpha3 = x[10*i+2];
1742:     alpha4 = x[10*i+3];
1743:     alpha5 = x[10*i+4];
1744:     alpha6 = x[10*i+5];
1745:     alpha7 = x[10*i+6];
1746:     alpha8 = x[10*i+7];
1747:     alpha9 = x[10*i+8];
1748:     alpha10 = x[10*i+9];
1749:     while (n-->0) {
1750:       y[10*(*idx)]   += alpha1*(*v);
1751:       y[10*(*idx)+1] += alpha2*(*v);
1752:       y[10*(*idx)+2] += alpha3*(*v);
1753:       y[10*(*idx)+3] += alpha4*(*v);
1754:       y[10*(*idx)+4] += alpha5*(*v);
1755:       y[10*(*idx)+5] += alpha6*(*v);
1756:       y[10*(*idx)+6] += alpha7*(*v);
1757:       y[10*(*idx)+7] += alpha8*(*v);
1758:       y[10*(*idx)+8] += alpha9*(*v);
1759:       y[10*(*idx)+9] += alpha10*(*v);
1760:       idx++; v++;
1761:     }
1762:   }
1763:   PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);
1764:   VecRestoreArray(xx,&x);
1765:   VecRestoreArray(yy,&y);
1766:   return(0);
1767: }

1771: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1772: {
1773:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1774:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1775:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1777:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1780:   if (yy != zz) {VecCopy(yy,zz);}
1781:   VecGetArray(xx,&x);
1782:   VecGetArray(zz,&y);
1783:   for (i=0; i<m; i++) {
1784:     idx    = a->j + a->i[i] ;
1785:     v      = a->a + a->i[i] ;
1786:     n      = a->i[i+1] - a->i[i];
1787:     alpha1 = x[10*i];
1788:     alpha2 = x[10*i+1];
1789:     alpha3 = x[10*i+2];
1790:     alpha4 = x[10*i+3];
1791:     alpha5 = x[10*i+4];
1792:     alpha6 = x[10*i+5];
1793:     alpha7 = x[10*i+6];
1794:     alpha8 = x[10*i+7];
1795:     alpha9 = x[10*i+8];
1796:     alpha10 = x[10*i+9];
1797:     while (n-->0) {
1798:       y[10*(*idx)]   += alpha1*(*v);
1799:       y[10*(*idx)+1] += alpha2*(*v);
1800:       y[10*(*idx)+2] += alpha3*(*v);
1801:       y[10*(*idx)+3] += alpha4*(*v);
1802:       y[10*(*idx)+4] += alpha5*(*v);
1803:       y[10*(*idx)+5] += alpha6*(*v);
1804:       y[10*(*idx)+6] += alpha7*(*v);
1805:       y[10*(*idx)+7] += alpha8*(*v);
1806:       y[10*(*idx)+8] += alpha9*(*v);
1807:       y[10*(*idx)+9] += alpha10*(*v);
1808:       idx++; v++;
1809:     }
1810:   }
1811:   PetscLogFlops(20*a->nz);
1812:   VecRestoreArray(xx,&x);
1813:   VecRestoreArray(zz,&y);
1814:   return(0);
1815: }


1818: /*--------------------------------------------------------------------------------------------*/
1821: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1822: {
1823:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1824:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1825:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1826:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1828:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1829:   PetscInt       n,i,jrow,j;

1832:   VecGetArray(xx,&x);
1833:   VecGetArray(yy,&y);
1834:   idx  = a->j;
1835:   v    = a->a;
1836:   ii   = a->i;

1838:   for (i=0; i<m; i++) {
1839:     jrow = ii[i];
1840:     n    = ii[i+1] - jrow;
1841:     sum1  = 0.0;
1842:     sum2  = 0.0;
1843:     sum3  = 0.0;
1844:     sum4  = 0.0;
1845:     sum5  = 0.0;
1846:     sum6  = 0.0;
1847:     sum7  = 0.0;
1848:     sum8  = 0.0;
1849:     sum9  = 0.0;
1850:     sum10 = 0.0;
1851:     sum11 = 0.0;
1852:     sum12 = 0.0;
1853:     sum13 = 0.0;
1854:     sum14 = 0.0;
1855:     sum15 = 0.0;
1856:     sum16 = 0.0;
1857:     for (j=0; j<n; j++) {
1858:       sum1  += v[jrow]*x[16*idx[jrow]];
1859:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1860:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1861:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1862:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1863:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1864:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1865:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1866:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1867:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1868:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1869:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1870:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1871:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1872:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1873:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1874:       jrow++;
1875:      }
1876:     y[16*i]    = sum1;
1877:     y[16*i+1]  = sum2;
1878:     y[16*i+2]  = sum3;
1879:     y[16*i+3]  = sum4;
1880:     y[16*i+4]  = sum5;
1881:     y[16*i+5]  = sum6;
1882:     y[16*i+6]  = sum7;
1883:     y[16*i+7]  = sum8;
1884:     y[16*i+8]  = sum9;
1885:     y[16*i+9]  = sum10;
1886:     y[16*i+10] = sum11;
1887:     y[16*i+11] = sum12;
1888:     y[16*i+12] = sum13;
1889:     y[16*i+13] = sum14;
1890:     y[16*i+14] = sum15;
1891:     y[16*i+15] = sum16;
1892:   }

1894:   PetscLogFlops(32*a->nz - 16*m);
1895:   VecRestoreArray(xx,&x);
1896:   VecRestoreArray(yy,&y);
1897:   return(0);
1898: }

1902: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1903: {
1904:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1905:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1906:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1907:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1909:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1912:   VecSet(yy,zero);
1913:   VecGetArray(xx,&x);
1914:   VecGetArray(yy,&y);

1916:   for (i=0; i<m; i++) {
1917:     idx    = a->j + a->i[i] ;
1918:     v      = a->a + a->i[i] ;
1919:     n      = a->i[i+1] - a->i[i];
1920:     alpha1  = x[16*i];
1921:     alpha2  = x[16*i+1];
1922:     alpha3  = x[16*i+2];
1923:     alpha4  = x[16*i+3];
1924:     alpha5  = x[16*i+4];
1925:     alpha6  = x[16*i+5];
1926:     alpha7  = x[16*i+6];
1927:     alpha8  = x[16*i+7];
1928:     alpha9  = x[16*i+8];
1929:     alpha10 = x[16*i+9];
1930:     alpha11 = x[16*i+10];
1931:     alpha12 = x[16*i+11];
1932:     alpha13 = x[16*i+12];
1933:     alpha14 = x[16*i+13];
1934:     alpha15 = x[16*i+14];
1935:     alpha16 = x[16*i+15];
1936:     while (n-->0) {
1937:       y[16*(*idx)]    += alpha1*(*v);
1938:       y[16*(*idx)+1]  += alpha2*(*v);
1939:       y[16*(*idx)+2]  += alpha3*(*v);
1940:       y[16*(*idx)+3]  += alpha4*(*v);
1941:       y[16*(*idx)+4]  += alpha5*(*v);
1942:       y[16*(*idx)+5]  += alpha6*(*v);
1943:       y[16*(*idx)+6]  += alpha7*(*v);
1944:       y[16*(*idx)+7]  += alpha8*(*v);
1945:       y[16*(*idx)+8]  += alpha9*(*v);
1946:       y[16*(*idx)+9]  += alpha10*(*v);
1947:       y[16*(*idx)+10] += alpha11*(*v);
1948:       y[16*(*idx)+11] += alpha12*(*v);
1949:       y[16*(*idx)+12] += alpha13*(*v);
1950:       y[16*(*idx)+13] += alpha14*(*v);
1951:       y[16*(*idx)+14] += alpha15*(*v);
1952:       y[16*(*idx)+15] += alpha16*(*v);
1953:       idx++; v++;
1954:     }
1955:   }
1956:   PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);
1957:   VecRestoreArray(xx,&x);
1958:   VecRestoreArray(yy,&y);
1959:   return(0);
1960: }

1964: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1965: {
1966:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1967:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1968:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1969:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1971:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1972:   PetscInt       n,i,jrow,j;

1975:   if (yy != zz) {VecCopy(yy,zz);}
1976:   VecGetArray(xx,&x);
1977:   VecGetArray(zz,&y);
1978:   idx  = a->j;
1979:   v    = a->a;
1980:   ii   = a->i;

1982:   for (i=0; i<m; i++) {
1983:     jrow = ii[i];
1984:     n    = ii[i+1] - jrow;
1985:     sum1  = 0.0;
1986:     sum2  = 0.0;
1987:     sum3  = 0.0;
1988:     sum4  = 0.0;
1989:     sum5  = 0.0;
1990:     sum6  = 0.0;
1991:     sum7  = 0.0;
1992:     sum8  = 0.0;
1993:     sum9  = 0.0;
1994:     sum10 = 0.0;
1995:     sum11 = 0.0;
1996:     sum12 = 0.0;
1997:     sum13 = 0.0;
1998:     sum14 = 0.0;
1999:     sum15 = 0.0;
2000:     sum16 = 0.0;
2001:     for (j=0; j<n; j++) {
2002:       sum1  += v[jrow]*x[16*idx[jrow]];
2003:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2004:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2005:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2006:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2007:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2008:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2009:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2010:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2011:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2012:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2013:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2014:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2015:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2016:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2017:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2018:       jrow++;
2019:      }
2020:     y[16*i]    += sum1;
2021:     y[16*i+1]  += sum2;
2022:     y[16*i+2]  += sum3;
2023:     y[16*i+3]  += sum4;
2024:     y[16*i+4]  += sum5;
2025:     y[16*i+5]  += sum6;
2026:     y[16*i+6]  += sum7;
2027:     y[16*i+7]  += sum8;
2028:     y[16*i+8]  += sum9;
2029:     y[16*i+9]  += sum10;
2030:     y[16*i+10] += sum11;
2031:     y[16*i+11] += sum12;
2032:     y[16*i+12] += sum13;
2033:     y[16*i+13] += sum14;
2034:     y[16*i+14] += sum15;
2035:     y[16*i+15] += sum16;
2036:   }

2038:   PetscLogFlops(32*a->nz);
2039:   VecRestoreArray(xx,&x);
2040:   VecRestoreArray(zz,&y);
2041:   return(0);
2042: }

2046: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2047: {
2048:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2049:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2050:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2051:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2053:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

2056:   if (yy != zz) {VecCopy(yy,zz);}
2057:   VecGetArray(xx,&x);
2058:   VecGetArray(zz,&y);
2059:   for (i=0; i<m; i++) {
2060:     idx    = a->j + a->i[i] ;
2061:     v      = a->a + a->i[i] ;
2062:     n      = a->i[i+1] - a->i[i];
2063:     alpha1 = x[16*i];
2064:     alpha2 = x[16*i+1];
2065:     alpha3 = x[16*i+2];
2066:     alpha4 = x[16*i+3];
2067:     alpha5 = x[16*i+4];
2068:     alpha6 = x[16*i+5];
2069:     alpha7 = x[16*i+6];
2070:     alpha8 = x[16*i+7];
2071:     alpha9  = x[16*i+8];
2072:     alpha10 = x[16*i+9];
2073:     alpha11 = x[16*i+10];
2074:     alpha12 = x[16*i+11];
2075:     alpha13 = x[16*i+12];
2076:     alpha14 = x[16*i+13];
2077:     alpha15 = x[16*i+14];
2078:     alpha16 = x[16*i+15];
2079:     while (n-->0) {
2080:       y[16*(*idx)]   += alpha1*(*v);
2081:       y[16*(*idx)+1] += alpha2*(*v);
2082:       y[16*(*idx)+2] += alpha3*(*v);
2083:       y[16*(*idx)+3] += alpha4*(*v);
2084:       y[16*(*idx)+4] += alpha5*(*v);
2085:       y[16*(*idx)+5] += alpha6*(*v);
2086:       y[16*(*idx)+6] += alpha7*(*v);
2087:       y[16*(*idx)+7] += alpha8*(*v);
2088:       y[16*(*idx)+8]  += alpha9*(*v);
2089:       y[16*(*idx)+9]  += alpha10*(*v);
2090:       y[16*(*idx)+10] += alpha11*(*v);
2091:       y[16*(*idx)+11] += alpha12*(*v);
2092:       y[16*(*idx)+12] += alpha13*(*v);
2093:       y[16*(*idx)+13] += alpha14*(*v);
2094:       y[16*(*idx)+14] += alpha15*(*v);
2095:       y[16*(*idx)+15] += alpha16*(*v);
2096:       idx++; v++;
2097:     }
2098:   }
2099:   PetscLogFlops(32*a->nz);
2100:   VecRestoreArray(xx,&x);
2101:   VecRestoreArray(zz,&y);
2102:   return(0);
2103: }

2105: /*===================================================================================*/
2108: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2109: {
2110:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2114:   /* start the scatter */
2115:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2116:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2117:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2118:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2119:   return(0);
2120: }

2124: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2125: {
2126:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2130:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2131:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2132:   VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2133:   VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2134:   return(0);
2135: }

2139: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2140: {
2141:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2145:   /* start the scatter */
2146:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2147:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2148:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2149:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2150:   return(0);
2151: }

2155: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2156: {
2157:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2161:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2162:   VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2163:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2164:   VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2165:   return(0);
2166: }

2168: /* ----------------------------------------------------------------*/
2171: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2172: {
2173:   /* This routine requires testing -- but it's getting better. */
2174:   PetscErrorCode     ierr;
2175:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2176:   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2177:   Mat                P=pp->AIJ;
2178:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2179:   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2180:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2181:   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2182:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2183:   MatScalar          *ca;

2186:   /* Start timer */

2189:   /* Get ij structure of P^T */
2190:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2192:   cn = pn*ppdof;
2193:   /* Allocate ci array, arrays for fill computation and */
2194:   /* free space for accumulating nonzero column info */
2195:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2196:   ci[0] = 0;

2198:   /* Work arrays for rows of P^T*A */
2199:   PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2200:   PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2201:   ptasparserow = ptadenserow  + an;
2202:   denserow     = ptasparserow + an;
2203:   sparserow    = denserow     + cn;

2205:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2206:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2207:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2208:   PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2209:   current_space = free_space;

2211:   /* Determine symbolic info for each row of C: */
2212:   for (i=0;i<pn;i++) {
2213:     ptnzi  = pti[i+1] - pti[i];
2214:     ptJ    = ptj + pti[i];
2215:     for (dof=0;dof<ppdof;dof++) {
2216:       ptanzi = 0;
2217:       /* Determine symbolic row of PtA: */
2218:       for (j=0;j<ptnzi;j++) {
2219:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2220:         arow = ptJ[j]*ppdof + dof;
2221:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2222:         anzj = ai[arow+1] - ai[arow];
2223:         ajj  = aj + ai[arow];
2224:         for (k=0;k<anzj;k++) {
2225:           if (!ptadenserow[ajj[k]]) {
2226:             ptadenserow[ajj[k]]    = -1;
2227:             ptasparserow[ptanzi++] = ajj[k];
2228:           }
2229:         }
2230:       }
2231:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2232:       ptaj = ptasparserow;
2233:       cnzi   = 0;
2234:       for (j=0;j<ptanzi;j++) {
2235:         /* Get offset within block of P */
2236:         pshift = *ptaj%ppdof;
2237:         /* Get block row of P */
2238:         prow = (*ptaj++)/ppdof; /* integer division */
2239:         /* P has same number of nonzeros per row as the compressed form */
2240:         pnzj = pi[prow+1] - pi[prow];
2241:         pjj  = pj + pi[prow];
2242:         for (k=0;k<pnzj;k++) {
2243:           /* Locations in C are shifted by the offset within the block */
2244:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2245:           if (!denserow[pjj[k]*ppdof+pshift]) {
2246:             denserow[pjj[k]*ppdof+pshift] = -1;
2247:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2248:           }
2249:         }
2250:       }

2252:       /* sort sparserow */
2253:       PetscSortInt(cnzi,sparserow);
2254: 
2255:       /* If free space is not available, make more free space */
2256:       /* Double the amount of total space in the list */
2257:       if (current_space->local_remaining<cnzi) {
2258:         PetscFreeSpaceGet(current_space->total_array_size,&current_space);
2259:       }

2261:       /* Copy data into free space, and zero out denserows */
2262:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2263:       current_space->array           += cnzi;
2264:       current_space->local_used      += cnzi;
2265:       current_space->local_remaining -= cnzi;

2267:       for (j=0;j<ptanzi;j++) {
2268:         ptadenserow[ptasparserow[j]] = 0;
2269:       }
2270:       for (j=0;j<cnzi;j++) {
2271:         denserow[sparserow[j]] = 0;
2272:       }
2273:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2274:       /*        For now, we will recompute what is needed. */
2275:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2276:     }
2277:   }
2278:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2279:   /* Allocate space for cj, initialize cj, and */
2280:   /* destroy list of free space and other temporary array(s) */
2281:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2282:   PetscFreeSpaceContiguous(&free_space,cj);
2283:   PetscFree(ptadenserow);
2284: 
2285:   /* Allocate space for ca */
2286:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2287:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2288: 
2289:   /* put together the new matrix */
2290:   MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);

2292:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2293:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2294:   c          = (Mat_SeqAIJ *)((*C)->data);
2295:   c->free_a  = PETSC_TRUE;
2296:   c->free_ij = PETSC_TRUE;
2297:   c->nonew   = 0;

2299:   /* Clean up. */
2300:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2303:   return(0);
2304: }

2308: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2309: {
2310:   /* This routine requires testing -- first draft only */
2312:   PetscInt       flops=0;
2313:   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2314:   Mat            P=pp->AIJ;
2315:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2316:   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2317:   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2318:   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2319:   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2320:   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2321:   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2322:   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;

2325:   /* Allocate temporary array for storage of one row of A*P */
2326:   PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2327:   PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));

2329:   apj      = (PetscInt *)(apa + cn);
2330:   apjdense = apj + cn;

2332:   /* Clear old values in C */
2333:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

2335:   for (i=0;i<am;i++) {
2336:     /* Form sparse row of A*P */
2337:     anzi  = ai[i+1] - ai[i];
2338:     apnzj = 0;
2339:     for (j=0;j<anzi;j++) {
2340:       /* Get offset within block of P */
2341:       pshift = *aj%ppdof;
2342:       /* Get block row of P */
2343:       prow   = *aj++/ppdof; /* integer division */
2344:       pnzj = pi[prow+1] - pi[prow];
2345:       pjj  = pj + pi[prow];
2346:       paj  = pa + pi[prow];
2347:       for (k=0;k<pnzj;k++) {
2348:         poffset = pjj[k]*ppdof+pshift;
2349:         if (!apjdense[poffset]) {
2350:           apjdense[poffset] = -1;
2351:           apj[apnzj++]      = poffset;
2352:         }
2353:         apa[poffset] += (*aa)*paj[k];
2354:       }
2355:       flops += 2*pnzj;
2356:       aa++;
2357:     }

2359:     /* Sort the j index array for quick sparse axpy. */
2360:     /* Note: a array does not need sorting as it is in dense storage locations. */
2361:     PetscSortInt(apnzj,apj);

2363:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2364:     prow    = i/ppdof; /* integer division */
2365:     pshift  = i%ppdof;
2366:     poffset = pi[prow];
2367:     pnzi = pi[prow+1] - poffset;
2368:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2369:     pJ   = pj+poffset;
2370:     pA   = pa+poffset;
2371:     for (j=0;j<pnzi;j++) {
2372:       crow   = (*pJ)*ppdof+pshift;
2373:       cjj    = cj + ci[crow];
2374:       caj    = ca + ci[crow];
2375:       pJ++;
2376:       /* Perform sparse axpy operation.  Note cjj includes apj. */
2377:       for (k=0,nextap=0;nextap<apnzj;k++) {
2378:         if (cjj[k]==apj[nextap]) {
2379:           caj[k] += (*pA)*apa[apj[nextap++]];
2380:         }
2381:       }
2382:       flops += 2*apnzj;
2383:       pA++;
2384:     }

2386:     /* Zero the current row info for A*P */
2387:     for (j=0;j<apnzj;j++) {
2388:       apa[apj[j]]      = 0.;
2389:       apjdense[apj[j]] = 0;
2390:     }
2391:   }

2393:   /* Assemble the final matrix and clean up */
2394:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2395:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2396:   PetscFree(apa);
2397:   PetscLogFlops(flops);
2398:   return(0);
2399: }

2403: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2404: {
2405:   PetscErrorCode    ierr;

2408:   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2409:   MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
2410:   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
2411:   return(0);
2412: }

2416: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2417: {
2419:   SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2420:   return(0);
2421: }

2426: PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2427: {
2428:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2429:   Mat               a = b->AIJ,B;
2430:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2431:   PetscErrorCode    ierr;
2432:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2433:   PetscInt          *cols;
2434:   PetscScalar       *vals;

2437:   MatGetSize(a,&m,&n);
2438:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2439:   for (i=0; i<m; i++) {
2440:     nmax = PetscMax(nmax,aij->ilen[i]);
2441:     for (j=0; j<dof; j++) {
2442:       ilen[dof*i+j] = aij->ilen[i];
2443:     }
2444:   }
2445:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2446:   MatSetOption(B,MAT_COLUMNS_SORTED);
2447:   PetscFree(ilen);
2448:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
2449:   ii   = 0;
2450:   for (i=0; i<m; i++) {
2451:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2452:     for (j=0; j<dof; j++) {
2453:       for (k=0; k<ncols; k++) {
2454:         icols[k] = dof*cols[k]+j;
2455:       }
2456:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2457:       ii++;
2458:     }
2459:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2460:   }
2461:   PetscFree(icols);
2462:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2463:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2465:   if (reuse == MAT_REUSE_MATRIX) {
2466:     MatHeaderReplace(A,B);
2467:   } else {
2468:     *newmat = B;
2469:   }
2470:   return(0);
2471: }

2474:  #include src/mat/impls/aij/mpi/mpiaij.h

2479: PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2480: {
2481:   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2482:   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2483:   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2484:   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2485:   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2486:   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2487:   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2488:   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2489:   PetscInt          rstart,cstart,*garray,ii,k;
2490:   PetscErrorCode    ierr;
2491:   PetscScalar       *vals,*ovals;

2494:   PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);
2495:   for (i=0; i<A->rmap.n/dof; i++) {
2496:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2497:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2498:     for (j=0; j<dof; j++) {
2499:       dnz[dof*i+j] = AIJ->ilen[i];
2500:       onz[dof*i+j] = OAIJ->ilen[i];
2501:     }
2502:   }
2503:   MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);
2504:   MatSetOption(B,MAT_COLUMNS_SORTED);
2505:   PetscFree2(dnz,onz);

2507:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2508:   rstart = dof*maij->A->rmap.rstart;
2509:   cstart = dof*maij->A->cmap.rstart;
2510:   garray = mpiaij->garray;

2512:   ii = rstart;
2513:   for (i=0; i<A->rmap.n/dof; i++) {
2514:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2515:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2516:     for (j=0; j<dof; j++) {
2517:       for (k=0; k<ncols; k++) {
2518:         icols[k] = cstart + dof*cols[k]+j;
2519:       }
2520:       for (k=0; k<oncols; k++) {
2521:         oicols[k] = dof*garray[ocols[k]]+j;
2522:       }
2523:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2524:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2525:       ii++;
2526:     }
2527:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2528:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2529:   }
2530:   PetscFree2(icols,oicols);

2532:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2533:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2535:   if (reuse == MAT_REUSE_MATRIX) {
2536:     PetscInt refct = A->refct; /* save A->refct */
2537:     A->refct = 1;
2538:     MatHeaderReplace(A,B);
2539:     A->refct = refct; /* restore A->refct */
2540:   } else {
2541:     *newmat = B;
2542:   }
2543:   return(0);
2544: }


2548: /* ---------------------------------------------------------------------------------- */
2549: /*MC
2550:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 
2551:   operations for multicomponent problems.  It interpolates each component the same
2552:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2553:   and MATMPIAIJ for distributed matrices.

2555:   Operations provided:
2556: + MatMult
2557: . MatMultTranspose
2558: . MatMultAdd
2559: . MatMultTransposeAdd
2560: - MatView

2562:   Level: advanced

2564: M*/
2567: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2568: {
2570:   PetscMPIInt    size;
2571:   PetscInt       n;
2572:   Mat_MPIMAIJ    *b;
2573:   Mat            B;

2576:   PetscObjectReference((PetscObject)A);

2578:   if (dof == 1) {
2579:     *maij = A;
2580:   } else {
2581:     MatCreate(A->comm,&B);
2582:     MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);
2583:     B->assembled    = PETSC_TRUE;

2585:     MPI_Comm_size(A->comm,&size);
2586:     if (size == 1) {
2587:       MatSetType(B,MATSEQMAIJ);
2588:       B->ops->destroy = MatDestroy_SeqMAIJ;
2589:       B->ops->view    = MatView_SeqMAIJ;
2590:       b      = (Mat_MPIMAIJ*)B->data;
2591:       b->dof = dof;
2592:       b->AIJ = A;
2593:       if (dof == 2) {
2594:         B->ops->mult             = MatMult_SeqMAIJ_2;
2595:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2596:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2597:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2598:       } else if (dof == 3) {
2599:         B->ops->mult             = MatMult_SeqMAIJ_3;
2600:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2601:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2602:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2603:       } else if (dof == 4) {
2604:         B->ops->mult             = MatMult_SeqMAIJ_4;
2605:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2606:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2607:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2608:       } else if (dof == 5) {
2609:         B->ops->mult             = MatMult_SeqMAIJ_5;
2610:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2611:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2612:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2613:       } else if (dof == 6) {
2614:         B->ops->mult             = MatMult_SeqMAIJ_6;
2615:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2616:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2617:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2618:       } else if (dof == 7) {
2619:         B->ops->mult             = MatMult_SeqMAIJ_7;
2620:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2621:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2622:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2623:       } else if (dof == 8) {
2624:         B->ops->mult             = MatMult_SeqMAIJ_8;
2625:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2626:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2627:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2628:       } else if (dof == 9) {
2629:         B->ops->mult             = MatMult_SeqMAIJ_9;
2630:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2631:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2632:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2633:       } else if (dof == 10) {
2634:         B->ops->mult             = MatMult_SeqMAIJ_10;
2635:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2636:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2637:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2638:       } else if (dof == 16) {
2639:         B->ops->mult             = MatMult_SeqMAIJ_16;
2640:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2641:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2642:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2643:       } else {
2644:         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2645:       }
2646:       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2647:       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2648:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2649:     } else {
2650:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2651:       IS         from,to;
2652:       Vec        gvec;
2653:       PetscInt   *garray,i;

2655:       MatSetType(B,MATMPIMAIJ);
2656:       B->ops->destroy = MatDestroy_MPIMAIJ;
2657:       B->ops->view    = MatView_MPIMAIJ;
2658:       b      = (Mat_MPIMAIJ*)B->data;
2659:       b->dof = dof;
2660:       b->A   = A;
2661:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2662:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

2664:       VecGetSize(mpiaij->lvec,&n);
2665:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2666:       VecSetBlockSize(b->w,dof);

2668:       /* create two temporary Index sets for build scatter gather */
2669:       PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2670:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2671:       ISCreateBlock(A->comm,dof,n,garray,&from);
2672:       PetscFree(garray);
2673:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

2675:       /* create temporary global vector to generate scatter context */
2676:       VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);
2677:       VecSetBlockSize(gvec,dof);

2679:       /* generate the scatter context */
2680:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

2682:       ISDestroy(from);
2683:       ISDestroy(to);
2684:       VecDestroy(gvec);

2686:       B->ops->mult             = MatMult_MPIMAIJ_dof;
2687:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
2688:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
2689:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2690:       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
2691:       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
2692:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
2693:     }
2694:     *maij = B;
2695:     MatView_Private(B);
2696:   }
2697:   return(0);
2698: }