Actual source code: baij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/inline/spops.h
 9:  #include petscsys.h

 11:  #include src/inline/ilu.h

 15: /*@C
 16:   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.

 18:   Collective on Mat

 20:   Input Parameters:
 21: . mat - the matrix

 23:   Level: advanced
 24: @*/
 25: PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
 26: {
 27:   PetscErrorCode ierr,(*f)(Mat);

 31:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
 32:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

 34:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
 35:   if (f) {
 36:     (*f)(mat);
 37:   } else {
 38:     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
 39:   }
 40:   return(0);
 41: }

 46: PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
 47: {
 48:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 50:   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
 51:   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;

 54:   if (a->idiagvalid) return(0);
 55:   MatMarkDiagonal_SeqBAIJ(A);
 56:   diag_offset = a->diag;
 57:   if (!a->idiag) {
 58:     PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
 59:   }
 60:   diag  = a->idiag;
 61:   mdiag = a->idiag+bs*bs*mbs;
 62:   /* factor and invert each block */
 63:   switch (bs){
 64:     case 2:
 65:       for (i=0; i<mbs; i++) {
 66:         odiag   = v + 4*diag_offset[i];
 67:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 68:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 69:         Kernel_A_gets_inverse_A_2(diag);
 70:         diag    += 4;
 71:         mdiag   += 4;
 72:       }
 73:       break;
 74:     case 3:
 75:       for (i=0; i<mbs; i++) {
 76:         odiag    = v + 9*diag_offset[i];
 77:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 78:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 79:         diag[8]  = odiag[8];
 80:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 81:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 82:         mdiag[8] = odiag[8];
 83:         Kernel_A_gets_inverse_A_3(diag);
 84:         diag    += 9;
 85:         mdiag   += 9;
 86:       }
 87:       break;
 88:     case 4:
 89:       for (i=0; i<mbs; i++) {
 90:         odiag  = v + 16*diag_offset[i];
 91:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 92:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 93:         Kernel_A_gets_inverse_A_4(diag);
 94:         diag  += 16;
 95:         mdiag += 16;
 96:       }
 97:       break;
 98:     case 5:
 99:       for (i=0; i<mbs; i++) {
100:         odiag = v + 25*diag_offset[i];
101:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
102:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
103:         Kernel_A_gets_inverse_A_5(diag);
104:         diag  += 25;
105:         mdiag += 25;
106:       }
107:       break;
108:     default:
109:       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110:   }
111:   a->idiagvalid = PETSC_TRUE;
112:   return(0);
113: }

118: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119: {
120:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121:   PetscScalar        *x,x1,x2,s1,s2;
122:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123:   PetscErrorCode     ierr;
124:   PetscInt           m = a->mbs,i,i2,nz,idx;
125:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

128:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129:   its = its*lits;
130:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

136:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

138:   diag  = a->diag;
139:   idiag = a->idiag;
140:   VecGetArray(xx,&x);
141:   VecGetArray(bb,(PetscScalar**)&b);

143:   if (flag & SOR_ZERO_INITIAL_GUESS) {
144:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147:       i2     = 2;
148:       idiag += 4;
149:       for (i=1; i<m; i++) {
150:         v     = aa + 4*ai[i];
151:         vi    = aj + ai[i];
152:         nz    = diag[i] - ai[i];
153:         s1    = b[i2]; s2 = b[i2+1];
154:         while (nz--) {
155:           idx  = 2*(*vi++);
156:           x1   = x[idx]; x2 = x[1+idx];
157:           s1  -= v[0]*x1 + v[2]*x2;
158:           s2  -= v[1]*x1 + v[3]*x2;
159:           v   += 4;
160:         }
161:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163:         idiag   += 4;
164:         i2      += 2;
165:       }
166:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167:       PetscLogFlops(4*(a->nz));
168:     }
169:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171:       i2    = 0;
172:       mdiag = a->idiag+4*a->mbs;
173:       for (i=0; i<m; i++) {
174:         x1      = x[i2]; x2 = x[i2+1];
175:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177:         mdiag  += 4;
178:         i2     += 2;
179:       }
180:       PetscLogFlops(6*m);
181:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
183:     }
184:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185:       idiag   = a->idiag+4*a->mbs - 4;
186:       i2      = 2*m - 2;
187:       x1      = x[i2]; x2 = x[i2+1];
188:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190:       idiag -= 4;
191:       i2    -= 2;
192:       for (i=m-2; i>=0; i--) {
193:         v     = aa + 4*(diag[i]+1);
194:         vi    = aj + diag[i] + 1;
195:         nz    = ai[i+1] - diag[i] - 1;
196:         s1    = x[i2]; s2 = x[i2+1];
197:         while (nz--) {
198:           idx  = 2*(*vi++);
199:           x1   = x[idx]; x2 = x[1+idx];
200:           s1  -= v[0]*x1 + v[2]*x2;
201:           s2  -= v[1]*x1 + v[3]*x2;
202:           v   += 4;
203:         }
204:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206:         idiag   -= 4;
207:         i2      -= 2;
208:       }
209:       PetscLogFlops(4*(a->nz));
210:     }
211:   } else {
212:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213:   }
214:   VecRestoreArray(xx,&x);
215:   VecRestoreArray(bb,(PetscScalar**)&b);
216:   return(0);
217: }

221: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222: {
223:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226:   PetscErrorCode     ierr;
227:   PetscInt           m = a->mbs,i,i2,nz,idx;
228:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

231:   its = its*lits;
232:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

238:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

240:   diag  = a->diag;
241:   idiag = a->idiag;
242:   VecGetArray(xx,&x);
243:   VecGetArray(bb,(PetscScalar**)&b);

245:   if (flag & SOR_ZERO_INITIAL_GUESS) {
246:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250:       i2     = 3;
251:       idiag += 9;
252:       for (i=1; i<m; i++) {
253:         v     = aa + 9*ai[i];
254:         vi    = aj + ai[i];
255:         nz    = diag[i] - ai[i];
256:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257:         while (nz--) {
258:           idx  = 3*(*vi++);
259:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263:           v   += 9;
264:         }
265:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268:         idiag   += 9;
269:         i2      += 3;
270:       }
271:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272:       PetscLogFlops(9*(a->nz));
273:     }
274:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276:       i2    = 0;
277:       mdiag = a->idiag+9*a->mbs;
278:       for (i=0; i<m; i++) {
279:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283:         mdiag  += 9;
284:         i2     += 3;
285:       }
286:       PetscLogFlops(15*m);
287:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
289:     }
290:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291:       idiag   = a->idiag+9*a->mbs - 9;
292:       i2      = 3*m - 3;
293:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297:       idiag -= 9;
298:       i2    -= 3;
299:       for (i=m-2; i>=0; i--) {
300:         v     = aa + 9*(diag[i]+1);
301:         vi    = aj + diag[i] + 1;
302:         nz    = ai[i+1] - diag[i] - 1;
303:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304:         while (nz--) {
305:           idx  = 3*(*vi++);
306:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310:           v   += 9;
311:         }
312:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315:         idiag   -= 9;
316:         i2      -= 3;
317:       }
318:       PetscLogFlops(9*(a->nz));
319:     }
320:   } else {
321:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322:   }
323:   VecRestoreArray(xx,&x);
324:   VecRestoreArray(bb,(PetscScalar**)&b);
325:   return(0);
326: }

330: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331: {
332:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335:   PetscErrorCode     ierr;
336:   PetscInt           m = a->mbs,i,i2,nz,idx;
337:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

340:   its = its*lits;
341:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

347:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

349:   diag  = a->diag;
350:   idiag = a->idiag;
351:   VecGetArray(xx,&x);
352:   VecGetArray(bb,(PetscScalar**)&b);

354:   if (flag & SOR_ZERO_INITIAL_GUESS) {
355:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360:       i2     = 4;
361:       idiag += 16;
362:       for (i=1; i<m; i++) {
363:         v     = aa + 16*ai[i];
364:         vi    = aj + ai[i];
365:         nz    = diag[i] - ai[i];
366:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367:         while (nz--) {
368:           idx  = 4*(*vi++);
369:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374:           v   += 16;
375:         }
376:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380:         idiag   += 16;
381:         i2      += 4;
382:       }
383:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384:       PetscLogFlops(16*(a->nz));
385:     }
386:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388:       i2    = 0;
389:       mdiag = a->idiag+16*a->mbs;
390:       for (i=0; i<m; i++) {
391:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396:         mdiag  += 16;
397:         i2     += 4;
398:       }
399:       PetscLogFlops(28*m);
400:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
402:     }
403:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404:       idiag   = a->idiag+16*a->mbs - 16;
405:       i2      = 4*m - 4;
406:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411:       idiag -= 16;
412:       i2    -= 4;
413:       for (i=m-2; i>=0; i--) {
414:         v     = aa + 16*(diag[i]+1);
415:         vi    = aj + diag[i] + 1;
416:         nz    = ai[i+1] - diag[i] - 1;
417:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418:         while (nz--) {
419:           idx  = 4*(*vi++);
420:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425:           v   += 16;
426:         }
427:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431:         idiag   -= 16;
432:         i2      -= 4;
433:       }
434:       PetscLogFlops(16*(a->nz));
435:     }
436:   } else {
437:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438:   }
439:   VecRestoreArray(xx,&x);
440:   VecRestoreArray(bb,(PetscScalar**)&b);
441:   return(0);
442: }

446: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447: {
448:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451:   PetscErrorCode     ierr;
452:   PetscInt           m = a->mbs,i,i2,nz,idx;
453:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

456:   its = its*lits;
457:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

463:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

465:   diag  = a->diag;
466:   idiag = a->idiag;
467:   VecGetArray(xx,&x);
468:   VecGetArray(bb,(PetscScalar**)&b);

470:   if (flag & SOR_ZERO_INITIAL_GUESS) {
471:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477:       i2     = 5;
478:       idiag += 25;
479:       for (i=1; i<m; i++) {
480:         v     = aa + 25*ai[i];
481:         vi    = aj + ai[i];
482:         nz    = diag[i] - ai[i];
483:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484:         while (nz--) {
485:           idx  = 5*(*vi++);
486:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492:           v   += 25;
493:         }
494:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499:         idiag   += 25;
500:         i2      += 5;
501:       }
502:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503:       PetscLogFlops(25*(a->nz));
504:     }
505:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507:       i2    = 0;
508:       mdiag = a->idiag+25*a->mbs;
509:       for (i=0; i<m; i++) {
510:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516:         mdiag  += 25;
517:         i2     += 5;
518:       }
519:       PetscLogFlops(45*m);
520:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
522:     }
523:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524:       idiag   = a->idiag+25*a->mbs - 25;
525:       i2      = 5*m - 5;
526:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532:       idiag -= 25;
533:       i2    -= 5;
534:       for (i=m-2; i>=0; i--) {
535:         v     = aa + 25*(diag[i]+1);
536:         vi    = aj + diag[i] + 1;
537:         nz    = ai[i+1] - diag[i] - 1;
538:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539:         while (nz--) {
540:           idx  = 5*(*vi++);
541:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547:           v   += 25;
548:         }
549:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554:         idiag   -= 25;
555:         i2      -= 5;
556:       }
557:       PetscLogFlops(25*(a->nz));
558:     }
559:   } else {
560:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561:   }
562:   VecRestoreArray(xx,&x);
563:   VecRestoreArray(bb,(PetscScalar**)&b);
564:   return(0);
565: }

567: /*
568:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569: */
570: #if defined(PETSC_HAVE_FORTRAN_CAPS)
571: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573: #define matsetvaluesblocked4_ matsetvaluesblocked4
574: #endif

579: void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580: {
581:   Mat               A = *AA;
582:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584:   PetscInt          *ai=a->i,*ailen=a->ilen;
585:   PetscInt          *aj=a->j,stepval,lastcol = -1;
586:   const PetscScalar *value = v;
587:   MatScalar         *ap,*aa = a->a,*bap;

590:   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591:   stepval = (n-1)*4;
592:   for (k=0; k<m; k++) { /* loop over added rows */
593:     row  = im[k];
594:     rp   = aj + ai[row];
595:     ap   = aa + 16*ai[row];
596:     nrow = ailen[row];
597:     low  = 0;
598:     high = nrow;
599:     for (l=0; l<n; l++) { /* loop over added columns */
600:       col = in[l];
601:       if (col <= lastcol) low = 0; else high = nrow;
602:       lastcol = col;
603:       value = v + k*(stepval+4 + l)*4;
604:       while (high-low > 7) {
605:         t = (low+high)/2;
606:         if (rp[t] > col) high = t;
607:         else             low  = t;
608:       }
609:       for (i=low; i<high; i++) {
610:         if (rp[i] > col) break;
611:         if (rp[i] == col) {
612:           bap  = ap +  16*i;
613:           for (ii=0; ii<4; ii++,value+=stepval) {
614:             for (jj=ii; jj<16; jj+=4) {
615:               bap[jj] += *value++;
616:             }
617:           }
618:           goto noinsert2;
619:         }
620:       }
621:       N = nrow++ - 1;
622:       high++; /* added new column index thus must search to one higher than before */
623:       /* shift up all the later entries in this row */
624:       for (ii=N; ii>=i; ii--) {
625:         rp[ii+1] = rp[ii];
626:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627:       }
628:       if (N >= i) {
629:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630:       }
631:       rp[i] = col;
632:       bap   = ap +  16*i;
633:       for (ii=0; ii<4; ii++,value+=stepval) {
634:         for (jj=ii; jj<16; jj+=4) {
635:           bap[jj] = *value++;
636:         }
637:       }
638:       noinsert2:;
639:       low = i;
640:     }
641:     ailen[row] = nrow;
642:   }
643:   PetscFunctionReturnVoid();
644: }

647: #if defined(PETSC_HAVE_FORTRAN_CAPS)
648: #define matsetvalues4_ MATSETVALUES4
649: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650: #define matsetvalues4_ matsetvalues4
651: #endif

656: void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657: {
658:   Mat         A = *AA;
659:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661:   PetscInt    *ai=a->i,*ailen=a->ilen;
662:   PetscInt    *aj=a->j,brow,bcol;
663:   PetscInt    ridx,cidx,lastcol = -1;
664:   MatScalar   *ap,value,*aa=a->a,*bap;
665: 
667:   for (k=0; k<m; k++) { /* loop over added rows */
668:     row  = im[k]; brow = row/4;
669:     rp   = aj + ai[brow];
670:     ap   = aa + 16*ai[brow];
671:     nrow = ailen[brow];
672:     low  = 0;
673:     high = nrow;
674:     for (l=0; l<n; l++) { /* loop over added columns */
675:       col = in[l]; bcol = col/4;
676:       ridx = row % 4; cidx = col % 4;
677:       value = v[l + k*n];
678:       if (col <= lastcol) low = 0; else high = nrow;
679:       lastcol = col;
680:       while (high-low > 7) {
681:         t = (low+high)/2;
682:         if (rp[t] > bcol) high = t;
683:         else              low  = t;
684:       }
685:       for (i=low; i<high; i++) {
686:         if (rp[i] > bcol) break;
687:         if (rp[i] == bcol) {
688:           bap  = ap +  16*i + 4*cidx + ridx;
689:           *bap += value;
690:           goto noinsert1;
691:         }
692:       }
693:       N = nrow++ - 1;
694:       high++; /* added new column thus must search to one higher than before */
695:       /* shift up all the later entries in this row */
696:       for (ii=N; ii>=i; ii--) {
697:         rp[ii+1] = rp[ii];
698:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699:       }
700:       if (N>=i) {
701:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702:       }
703:       rp[i]                    = bcol;
704:       ap[16*i + 4*cidx + ridx] = value;
705:       noinsert1:;
706:       low = i;
707:     }
708:     ailen[brow] = nrow;
709:   }
710:   PetscFunctionReturnVoid();
711: }

714: /*  UGLY, ugly, ugly
715:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
716:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
717:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718:    converts the entries into single precision and then calls ..._MatScalar() to put them
719:    into the single precision data structures.
720: */
721: #if defined(PETSC_USE_MAT_SINGLE)
722: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723: #else
724: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725: #endif

727: #define CHUNKSIZE  10

729: /*
730:      Checks for missing diagonals
731: */
734: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735: {
736:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
738:   PetscInt       *diag,*jj = a->j,i;

741:   MatMarkDiagonal_SeqBAIJ(A);
742:   diag = a->diag;
743:   for (i=0; i<a->mbs; i++) {
744:     if (jj[diag[i]] != i) {
745:       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746:     }
747:   }
748:   return(0);
749: }

753: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754: {
755:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
757:   PetscInt       i,j,m = a->mbs;

760:   if (!a->diag) {
761:     PetscMalloc(m*sizeof(PetscInt),&a->diag);
762:   }
763:   for (i=0; i<m; i++) {
764:     a->diag[i] = a->i[i+1];
765:     for (j=a->i[i]; j<a->i[i+1]; j++) {
766:       if (a->j[j] == i) {
767:         a->diag[i] = j;
768:         break;
769:       }
770:     }
771:   }
772:   return(0);
773: }


776: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);

780: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781: {
782:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
784:   PetscInt       n = a->mbs,i;

787:   *nn = n;
788:   if (!ia) return(0);
789:   if (symmetric) {
790:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
791:   } else if (oshift == 1) {
792:     /* temporarily add 1 to i and j indices */
793:     PetscInt nz = a->i[n];
794:     for (i=0; i<nz; i++) a->j[i]++;
795:     for (i=0; i<n+1; i++) a->i[i]++;
796:     *ia = a->i; *ja = a->j;
797:   } else {
798:     *ia = a->i; *ja = a->j;
799:   }

801:   return(0);
802: }

806: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
807: {
808:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
810:   PetscInt       i,n = a->mbs;

813:   if (!ia) return(0);
814:   if (symmetric) {
815:     PetscFree(*ia);
816:     PetscFree(*ja);
817:   } else if (oshift == 1) {
818:     PetscInt nz = a->i[n]-1;
819:     for (i=0; i<nz; i++) a->j[i]--;
820:     for (i=0; i<n+1; i++) a->i[i]--;
821:   }
822:   return(0);
823: }

827: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
828: {
829:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

833: #if defined(PETSC_USE_LOG)
834:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
835: #endif
836:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
837:   if (a->row) {
838:     ISDestroy(a->row);
839:   }
840:   if (a->col) {
841:     ISDestroy(a->col);
842:   }
843:   PetscFree(a->diag);
844:   PetscFree(a->idiag);
845:   PetscFree2(a->imax,a->ilen);
846:   PetscFree(a->solve_work);
847:   PetscFree(a->mult_work);
848:   if (a->icol) {ISDestroy(a->icol);}
849:   PetscFree(a->saved_values);
850: #if defined(PETSC_USE_MAT_SINGLE)
851:   PetscFree(a->setvaluescopy);
852: #endif
853:   PetscFree(a->xtoy);
854:   if (a->compressedrow.use){PetscFree(a->compressedrow.i);}

856:   PetscFree(a);

858:   PetscObjectChangeTypeName((PetscObject)A,0);
859:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
860:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
861:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
862:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
863:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
864:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
865:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
866:   return(0);
867: }

871: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
872: {
873:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

877:   switch (op) {
878:   case MAT_ROW_ORIENTED:
879:     a->roworiented    = PETSC_TRUE;
880:     break;
881:   case MAT_COLUMN_ORIENTED:
882:     a->roworiented    = PETSC_FALSE;
883:     break;
884:   case MAT_COLUMNS_SORTED:
885:     a->sorted         = PETSC_TRUE;
886:     break;
887:   case MAT_COLUMNS_UNSORTED:
888:     a->sorted         = PETSC_FALSE;
889:     break;
890:   case MAT_KEEP_ZEROED_ROWS:
891:     a->keepzeroedrows = PETSC_TRUE;
892:     break;
893:   case MAT_NO_NEW_NONZERO_LOCATIONS:
894:     a->nonew          = 1;
895:     break;
896:   case MAT_NEW_NONZERO_LOCATION_ERR:
897:     a->nonew          = -1;
898:     break;
899:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
900:     a->nonew          = -2;
901:     break;
902:   case MAT_YES_NEW_NONZERO_LOCATIONS:
903:     a->nonew          = 0;
904:     break;
905:   case MAT_ROWS_SORTED:
906:   case MAT_ROWS_UNSORTED:
907:   case MAT_YES_NEW_DIAGONALS:
908:   case MAT_IGNORE_OFF_PROC_ENTRIES:
909:   case MAT_USE_HASH_TABLE:
910:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
911:     break;
912:   case MAT_NO_NEW_DIAGONALS:
913:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
914:   case MAT_SYMMETRIC:
915:   case MAT_STRUCTURALLY_SYMMETRIC:
916:   case MAT_NOT_SYMMETRIC:
917:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
918:   case MAT_HERMITIAN:
919:   case MAT_NOT_HERMITIAN:
920:   case MAT_SYMMETRY_ETERNAL:
921:   case MAT_NOT_SYMMETRY_ETERNAL:
922:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
923:     break;
924:   default:
925:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
926:   }
927:   return(0);
928: }

932: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
933: {
934:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
936:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
937:   MatScalar      *aa,*aa_i;
938:   PetscScalar    *v_i;

941:   bs  = A->rmap.bs;
942:   ai  = a->i;
943:   aj  = a->j;
944:   aa  = a->a;
945:   bs2 = a->bs2;
946: 
947:   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
948: 
949:   bn  = row/bs;   /* Block number */
950:   bp  = row % bs; /* Block Position */
951:   M   = ai[bn+1] - ai[bn];
952:   *nz = bs*M;
953: 
954:   if (v) {
955:     *v = 0;
956:     if (*nz) {
957:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
958:       for (i=0; i<M; i++) { /* for each block in the block row */
959:         v_i  = *v + i*bs;
960:         aa_i = aa + bs2*(ai[bn] + i);
961:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
962:       }
963:     }
964:   }

966:   if (idx) {
967:     *idx = 0;
968:     if (*nz) {
969:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
970:       for (i=0; i<M; i++) { /* for each block in the block row */
971:         idx_i = *idx + i*bs;
972:         itmp  = bs*aj[ai[bn] + i];
973:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
974:       }
975:     }
976:   }
977:   return(0);
978: }

982: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
983: {

987:   if (idx) {PetscFree(*idx);}
988:   if (v)   {PetscFree(*v);}
989:   return(0);
990: }

994: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
995: {
996:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
997:   Mat            C;
999:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1000:   PetscInt       *rows,*cols,bs2=a->bs2;
1001:   PetscScalar    *array;

1004:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1005:   PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1006:   PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

1008: #if defined(PETSC_USE_MAT_SINGLE)
1009:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
1010:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1011: #else
1012:   array = a->a;
1013: #endif

1015:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1016:   MatCreate(A->comm,&C);
1017:   MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);
1018:   MatSetType(C,A->type_name);
1019:   MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1020:   PetscFree(col);
1021:   PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1022:   cols = rows + bs;
1023:   for (i=0; i<mbs; i++) {
1024:     cols[0] = i*bs;
1025:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1026:     len = ai[i+1] - ai[i];
1027:     for (j=0; j<len; j++) {
1028:       rows[0] = (*aj++)*bs;
1029:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1030:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1031:       array += bs2;
1032:     }
1033:   }
1034:   PetscFree(rows);
1035: #if defined(PETSC_USE_MAT_SINGLE)
1036:   PetscFree(array);
1037: #endif
1038: 
1039:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1040:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1041: 
1042:   if (B) {
1043:     *B = C;
1044:   } else {
1045:     MatHeaderCopy(A,C);
1046:   }
1047:   return(0);
1048: }

1052: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1053: {
1054:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1056:   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1057:   int            fd;
1058:   PetscScalar    *aa;
1059:   FILE           *file;

1062:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1063:   PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);
1064:   col_lens[0] = MAT_FILE_COOKIE;

1066:   col_lens[1] = A->rmap.N;
1067:   col_lens[2] = A->cmap.n;
1068:   col_lens[3] = a->nz*bs2;

1070:   /* store lengths of each row and write (including header) to file */
1071:   count = 0;
1072:   for (i=0; i<a->mbs; i++) {
1073:     for (j=0; j<bs; j++) {
1074:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1075:     }
1076:   }
1077:   PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);
1078:   PetscFree(col_lens);

1080:   /* store column indices (zero start index) */
1081:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1082:   count = 0;
1083:   for (i=0; i<a->mbs; i++) {
1084:     for (j=0; j<bs; j++) {
1085:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1086:         for (l=0; l<bs; l++) {
1087:           jj[count++] = bs*a->j[k] + l;
1088:         }
1089:       }
1090:     }
1091:   }
1092:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1093:   PetscFree(jj);

1095:   /* store nonzero values */
1096:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1097:   count = 0;
1098:   for (i=0; i<a->mbs; i++) {
1099:     for (j=0; j<bs; j++) {
1100:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1101:         for (l=0; l<bs; l++) {
1102:           aa[count++] = a->a[bs2*k + l*bs + j];
1103:         }
1104:       }
1105:     }
1106:   }
1107:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1108:   PetscFree(aa);

1110:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1111:   if (file) {
1112:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1113:   }
1114:   return(0);
1115: }

1119: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1120: {
1121:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1122:   PetscErrorCode    ierr;
1123:   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1124:   PetscViewerFormat format;

1127:   PetscViewerGetFormat(viewer,&format);
1128:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1129:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1130:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1131:     Mat aij;
1132:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1133:     MatView(aij,viewer);
1134:     MatDestroy(aij);
1135:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1136:      return(0);
1137:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1138:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1139:     for (i=0; i<a->mbs; i++) {
1140:       for (j=0; j<bs; j++) {
1141:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1142:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1143:           for (l=0; l<bs; l++) {
1144: #if defined(PETSC_USE_COMPLEX)
1145:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1146:               PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1147:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1148:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1149:               PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1150:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1151:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1152:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1153:             }
1154: #else
1155:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1156:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1157:             }
1158: #endif
1159:           }
1160:         }
1161:         PetscViewerASCIIPrintf(viewer,"\n");
1162:       }
1163:     }
1164:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1165:   } else {
1166:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1167:     for (i=0; i<a->mbs; i++) {
1168:       for (j=0; j<bs; j++) {
1169:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1170:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1171:           for (l=0; l<bs; l++) {
1172: #if defined(PETSC_USE_COMPLEX)
1173:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1174:               PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1175:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1176:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1177:               PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1178:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1179:             } else {
1180:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1181:             }
1182: #else
1183:             PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1184: #endif
1185:           }
1186:         }
1187:         PetscViewerASCIIPrintf(viewer,"\n");
1188:       }
1189:     }
1190:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1191:   }
1192:   PetscViewerFlush(viewer);
1193:   return(0);
1194: }

1198: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1199: {
1200:   Mat            A = (Mat) Aa;
1201:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1203:   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1204:   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1205:   MatScalar      *aa;
1206:   PetscViewer    viewer;


1210:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1211:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

1213:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1215:   /* loop over matrix elements drawing boxes */
1216:   color = PETSC_DRAW_BLUE;
1217:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1218:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1219:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1220:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1221:       aa = a->a + j*bs2;
1222:       for (k=0; k<bs; k++) {
1223:         for (l=0; l<bs; l++) {
1224:           if (PetscRealPart(*aa++) >=  0.) continue;
1225:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1226:         }
1227:       }
1228:     }
1229:   }
1230:   color = PETSC_DRAW_CYAN;
1231:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1232:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1233:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1234:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1235:       aa = a->a + j*bs2;
1236:       for (k=0; k<bs; k++) {
1237:         for (l=0; l<bs; l++) {
1238:           if (PetscRealPart(*aa++) != 0.) continue;
1239:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1240:         }
1241:       }
1242:     }
1243:   }

1245:   color = PETSC_DRAW_RED;
1246:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1247:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1248:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1249:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1250:       aa = a->a + j*bs2;
1251:       for (k=0; k<bs; k++) {
1252:         for (l=0; l<bs; l++) {
1253:           if (PetscRealPart(*aa++) <= 0.) continue;
1254:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1255:         }
1256:       }
1257:     }
1258:   }
1259:   return(0);
1260: }

1264: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1265: {
1267:   PetscReal      xl,yl,xr,yr,w,h;
1268:   PetscDraw      draw;
1269:   PetscTruth     isnull;


1273:   PetscViewerDrawGetDraw(viewer,0,&draw);
1274:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1276:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1277:   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1278:   xr += w;    yr += h;  xl = -w;     yl = -h;
1279:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1280:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1281:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1282:   return(0);
1283: }

1287: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1288: {
1290:   PetscTruth     iascii,isbinary,isdraw;

1293:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1294:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1295:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1296:   if (iascii){
1297:     MatView_SeqBAIJ_ASCII(A,viewer);
1298:   } else if (isbinary) {
1299:     MatView_SeqBAIJ_Binary(A,viewer);
1300:   } else if (isdraw) {
1301:     MatView_SeqBAIJ_Draw(A,viewer);
1302:   } else {
1303:     Mat B;
1304:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1305:     MatView(B,viewer);
1306:     MatDestroy(B);
1307:   }
1308:   return(0);
1309: }


1314: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1315: {
1316:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1317:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1318:   PetscInt    *ai = a->i,*ailen = a->ilen;
1319:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1320:   MatScalar   *ap,*aa = a->a,zero = 0.0;

1323:   for (k=0; k<m; k++) { /* loop over rows */
1324:     row  = im[k]; brow = row/bs;
1325:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1326:     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1327:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1328:     nrow = ailen[brow];
1329:     for (l=0; l<n; l++) { /* loop over columns */
1330:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1331:       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1332:       col  = in[l] ;
1333:       bcol = col/bs;
1334:       cidx = col%bs;
1335:       ridx = row%bs;
1336:       high = nrow;
1337:       low  = 0; /* assume unsorted */
1338:       while (high-low > 5) {
1339:         t = (low+high)/2;
1340:         if (rp[t] > bcol) high = t;
1341:         else             low  = t;
1342:       }
1343:       for (i=low; i<high; i++) {
1344:         if (rp[i] > bcol) break;
1345:         if (rp[i] == bcol) {
1346:           *v++ = ap[bs2*i+bs*cidx+ridx];
1347:           goto finished;
1348:         }
1349:       }
1350:       *v++ = zero;
1351:       finished:;
1352:     }
1353:   }
1354:   return(0);
1355: }

1357: #if defined(PETSC_USE_MAT_SINGLE)
1360: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1361: {
1362:   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1364:   PetscInt       i,N = m*n*b->bs2;
1365:   MatScalar      *vsingle;

1368:   if (N > b->setvalueslen) {
1369:     PetscFree(b->setvaluescopy);
1370:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1371:     b->setvalueslen  = N;
1372:   }
1373:   vsingle = b->setvaluescopy;
1374:   for (i=0; i<N; i++) {
1375:     vsingle[i] = v[i];
1376:   }
1377:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1378:   return(0);
1379: }
1380: #endif


1385: PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1386: {
1387:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1388:   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1389:   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1390:   PetscErrorCode  ierr;
1391:   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1392:   PetscTruth      roworiented=a->roworiented;
1393:   const MatScalar *value = v;
1394:   MatScalar       *ap,*aa = a->a,*bap;

1397:   if (roworiented) {
1398:     stepval = (n-1)*bs;
1399:   } else {
1400:     stepval = (m-1)*bs;
1401:   }
1402:   for (k=0; k<m; k++) { /* loop over added rows */
1403:     row  = im[k];
1404:     if (row < 0) continue;
1405: #if defined(PETSC_USE_DEBUG)  
1406:     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1407: #endif
1408:     rp   = aj + ai[row];
1409:     ap   = aa + bs2*ai[row];
1410:     rmax = imax[row];
1411:     nrow = ailen[row];
1412:     low  = 0;
1413:     high = nrow;
1414:     for (l=0; l<n; l++) { /* loop over added columns */
1415:       if (in[l] < 0) continue;
1416: #if defined(PETSC_USE_DEBUG)  
1417:       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1418: #endif
1419:       col = in[l];
1420:       if (roworiented) {
1421:         value = v + k*(stepval+bs)*bs + l*bs;
1422:       } else {
1423:         value = v + l*(stepval+bs)*bs + k*bs;
1424:       }
1425:       if (col <= lastcol) low = 0; else high = nrow;
1426:       lastcol = col;
1427:       while (high-low > 7) {
1428:         t = (low+high)/2;
1429:         if (rp[t] > col) high = t;
1430:         else             low  = t;
1431:       }
1432:       for (i=low; i<high; i++) {
1433:         if (rp[i] > col) break;
1434:         if (rp[i] == col) {
1435:           bap  = ap +  bs2*i;
1436:           if (roworiented) {
1437:             if (is == ADD_VALUES) {
1438:               for (ii=0; ii<bs; ii++,value+=stepval) {
1439:                 for (jj=ii; jj<bs2; jj+=bs) {
1440:                   bap[jj] += *value++;
1441:                 }
1442:               }
1443:             } else {
1444:               for (ii=0; ii<bs; ii++,value+=stepval) {
1445:                 for (jj=ii; jj<bs2; jj+=bs) {
1446:                   bap[jj] = *value++;
1447:                 }
1448:               }
1449:             }
1450:           } else {
1451:             if (is == ADD_VALUES) {
1452:               for (ii=0; ii<bs; ii++,value+=stepval) {
1453:                 for (jj=0; jj<bs; jj++) {
1454:                   *bap++ += *value++;
1455:                 }
1456:               }
1457:             } else {
1458:               for (ii=0; ii<bs; ii++,value+=stepval) {
1459:                 for (jj=0; jj<bs; jj++) {
1460:                   *bap++  = *value++;
1461:                 }
1462:               }
1463:             }
1464:           }
1465:           goto noinsert2;
1466:         }
1467:       }
1468:       if (nonew == 1) goto noinsert2;
1469:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1470:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1471:       N = nrow++ - 1; high++;
1472:       /* shift up all the later entries in this row */
1473:       for (ii=N; ii>=i; ii--) {
1474:         rp[ii+1] = rp[ii];
1475:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1476:       }
1477:       if (N >= i) {
1478:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1479:       }
1480:       rp[i] = col;
1481:       bap   = ap +  bs2*i;
1482:       if (roworiented) {
1483:         for (ii=0; ii<bs; ii++,value+=stepval) {
1484:           for (jj=ii; jj<bs2; jj+=bs) {
1485:             bap[jj] = *value++;
1486:           }
1487:         }
1488:       } else {
1489:         for (ii=0; ii<bs; ii++,value+=stepval) {
1490:           for (jj=0; jj<bs; jj++) {
1491:             *bap++  = *value++;
1492:           }
1493:         }
1494:       }
1495:       noinsert2:;
1496:       low = i;
1497:     }
1498:     ailen[row] = nrow;
1499:   }
1500:   return(0);
1501: }

1505: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1506: {
1507:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1508:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1509:   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1511:   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1512:   MatScalar      *aa = a->a,*ap;
1513:   PetscReal      ratio=0.6;

1516:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1518:   if (m) rmax = ailen[0];
1519:   for (i=1; i<mbs; i++) {
1520:     /* move each row back by the amount of empty slots (fshift) before it*/
1521:     fshift += imax[i-1] - ailen[i-1];
1522:     rmax   = PetscMax(rmax,ailen[i]);
1523:     if (fshift) {
1524:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1525:       N = ailen[i];
1526:       for (j=0; j<N; j++) {
1527:         ip[j-fshift] = ip[j];
1528:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1529:       }
1530:     }
1531:     ai[i] = ai[i-1] + ailen[i-1];
1532:   }
1533:   if (mbs) {
1534:     fshift += imax[mbs-1] - ailen[mbs-1];
1535:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1536:   }
1537:   /* reset ilen and imax for each row */
1538:   for (i=0; i<mbs; i++) {
1539:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1540:   }
1541:   a->nz = ai[mbs];

1543:   /* diagonals may have moved, so kill the diagonal pointers */
1544:   a->idiagvalid = PETSC_FALSE;
1545:   if (fshift && a->diag) {
1546:     PetscFree(a->diag);
1547:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1548:     a->diag = 0;
1549:   }
1550:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);
1551:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1552:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1553:   a->reallocs          = 0;
1554:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

1556:   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1557:   if (a->compressedrow.use){
1558:     Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1559:   }

1561:   A->same_nonzero = PETSC_TRUE;
1562:   return(0);
1563: }

1565: /* 
1566:    This function returns an array of flags which indicate the locations of contiguous
1567:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1568:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1569:    Assume: sizes should be long enough to hold all the values.
1570: */
1573: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1574: {
1575:   PetscInt   i,j,k,row;
1576:   PetscTruth flg;

1579:   for (i=0,j=0; i<n; j++) {
1580:     row = idx[i];
1581:     if (row%bs!=0) { /* Not the begining of a block */
1582:       sizes[j] = 1;
1583:       i++;
1584:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1585:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1586:       i++;
1587:     } else { /* Begining of the block, so check if the complete block exists */
1588:       flg = PETSC_TRUE;
1589:       for (k=1; k<bs; k++) {
1590:         if (row+k != idx[i+k]) { /* break in the block */
1591:           flg = PETSC_FALSE;
1592:           break;
1593:         }
1594:       }
1595:       if (flg) { /* No break in the bs */
1596:         sizes[j] = bs;
1597:         i+= bs;
1598:       } else {
1599:         sizes[j] = 1;
1600:         i++;
1601:       }
1602:     }
1603:   }
1604:   *bs_max = j;
1605:   return(0);
1606: }
1607: 
1610: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1611: {
1612:   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1614:   PetscInt       i,j,k,count,*rows;
1615:   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1616:   PetscScalar    zero = 0.0;
1617:   MatScalar      *aa;

1620:   /* Make a copy of the IS and  sort it */
1621:   /* allocate memory for rows,sizes */
1622:   PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
1623:   sizes = rows + is_n;

1625:   /* copy IS values to rows, and sort them */
1626:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1627:   PetscSortInt(is_n,rows);
1628:   if (baij->keepzeroedrows) {
1629:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1630:     bs_max = is_n;
1631:     A->same_nonzero = PETSC_TRUE;
1632:   } else {
1633:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1634:     A->same_nonzero = PETSC_FALSE;
1635:   }

1637:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1638:     row   = rows[j];
1639:     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1640:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1641:     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1642:     if (sizes[i] == bs && !baij->keepzeroedrows) {
1643:       if (diag != 0.0) {
1644:         if (baij->ilen[row/bs] > 0) {
1645:           baij->ilen[row/bs]       = 1;
1646:           baij->j[baij->i[row/bs]] = row/bs;
1647:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
1648:         }
1649:         /* Now insert all the diagonal values for this bs */
1650:         for (k=0; k<bs; k++) {
1651:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
1652:         }
1653:       } else { /* (diag == 0.0) */
1654:         baij->ilen[row/bs] = 0;
1655:       } /* end (diag == 0.0) */
1656:     } else { /* (sizes[i] != bs) */
1657: #if defined (PETSC_USE_DEBUG)
1658:       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1659: #endif
1660:       for (k=0; k<count; k++) {
1661:         aa[0] =  zero;
1662:         aa    += bs;
1663:       }
1664:       if (diag != 0.0) {
1665:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
1666:       }
1667:     }
1668:   }

1670:   PetscFree(rows);
1671:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1672:   return(0);
1673: }

1677: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1678: {
1679:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1680:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1681:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1682:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1684:   PetscInt       ridx,cidx,bs2=a->bs2;
1685:   PetscTruth     roworiented=a->roworiented;
1686:   MatScalar      *ap,value,*aa=a->a,*bap;

1689:   for (k=0; k<m; k++) { /* loop over added rows */
1690:     row  = im[k];
1691:     brow = row/bs;
1692:     if (row < 0) continue;
1693: #if defined(PETSC_USE_DEBUG)  
1694:     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1695: #endif
1696:     rp   = aj + ai[brow];
1697:     ap   = aa + bs2*ai[brow];
1698:     rmax = imax[brow];
1699:     nrow = ailen[brow];
1700:     low  = 0;
1701:     high = nrow;
1702:     for (l=0; l<n; l++) { /* loop over added columns */
1703:       if (in[l] < 0) continue;
1704: #if defined(PETSC_USE_DEBUG)  
1705:       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1706: #endif
1707:       col = in[l]; bcol = col/bs;
1708:       ridx = row % bs; cidx = col % bs;
1709:       if (roworiented) {
1710:         value = v[l + k*n];
1711:       } else {
1712:         value = v[k + l*m];
1713:       }
1714:       if (col <= lastcol) low = 0; else high = nrow;
1715:       lastcol = col;
1716:       while (high-low > 7) {
1717:         t = (low+high)/2;
1718:         if (rp[t] > bcol) high = t;
1719:         else              low  = t;
1720:       }
1721:       for (i=low; i<high; i++) {
1722:         if (rp[i] > bcol) break;
1723:         if (rp[i] == bcol) {
1724:           bap  = ap +  bs2*i + bs*cidx + ridx;
1725:           if (is == ADD_VALUES) *bap += value;
1726:           else                  *bap  = value;
1727:           goto noinsert1;
1728:         }
1729:       }
1730:       if (nonew == 1) goto noinsert1;
1731:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1732:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1733:       N = nrow++ - 1; high++;
1734:       /* shift up all the later entries in this row */
1735:       for (ii=N; ii>=i; ii--) {
1736:         rp[ii+1] = rp[ii];
1737:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1738:       }
1739:       if (N>=i) {
1740:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1741:       }
1742:       rp[i]                      = bcol;
1743:       ap[bs2*i + bs*cidx + ridx] = value;
1744:       a->nz++;
1745:       noinsert1:;
1746:       low = i;
1747:     }
1748:     ailen[brow] = nrow;
1749:   }
1750:   A->same_nonzero = PETSC_FALSE;
1751:   return(0);
1752: }


1757: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1758: {
1759:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1760:   Mat            outA;
1762:   PetscTruth     row_identity,col_identity;

1765:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1766:   ISIdentity(row,&row_identity);
1767:   ISIdentity(col,&col_identity);
1768:   if (!row_identity || !col_identity) {
1769:     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1770:   }

1772:   outA          = inA;
1773:   inA->factor   = FACTOR_LU;

1775:   MatMarkDiagonal_SeqBAIJ(inA);

1777:   a->row        = row;
1778:   a->col        = col;
1779:   PetscObjectReference((PetscObject)row);
1780:   PetscObjectReference((PetscObject)col);
1781: 
1782:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1783:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1784:   PetscLogObjectParent(inA,a->icol);
1785: 
1786:   /*
1787:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
1788:       for ILU(0) factorization with natural ordering
1789:   */
1790:   if (inA->rmap.bs < 8) {
1791:     MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1792:   } else {
1793:     if (!a->solve_work) {
1794:       PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);
1795:       PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));
1796:     }
1797:   }

1799:   MatLUFactorNumeric(inA,info,&outA);

1801:   return(0);
1802: }

1807: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1808: {
1809:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1810:   PetscInt    i,nz,nbs;

1813:   nz  = baij->maxnz/baij->bs2;
1814:   nbs = baij->nbs;
1815:   for (i=0; i<nz; i++) {
1816:     baij->j[i] = indices[i];
1817:   }
1818:   baij->nz = nz;
1819:   for (i=0; i<nbs; i++) {
1820:     baij->ilen[i] = baij->imax[i];
1821:   }

1823:   return(0);
1824: }

1829: /*@
1830:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1831:        in the matrix.

1833:   Input Parameters:
1834: +  mat - the SeqBAIJ matrix
1835: -  indices - the column indices

1837:   Level: advanced

1839:   Notes:
1840:     This can be called if you have precomputed the nonzero structure of the 
1841:   matrix and want to provide it to the matrix object to improve the performance
1842:   of the MatSetValues() operation.

1844:     You MUST have set the correct numbers of nonzeros per row in the call to 
1845:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

1847:     MUST be called before any calls to MatSetValues();

1849: @*/
1850: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1851: {
1852:   PetscErrorCode ierr,(*f)(Mat,PetscInt *);

1857:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1858:   if (f) {
1859:     (*f)(mat,indices);
1860:   } else {
1861:     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1862:   }
1863:   return(0);
1864: }

1868: PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1869: {
1870:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1872:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1873:   PetscReal      atmp;
1874:   PetscScalar    *x,zero = 0.0;
1875:   MatScalar      *aa;
1876:   PetscInt       ncols,brow,krow,kcol;

1879:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1880:   bs   = A->rmap.bs;
1881:   aa   = a->a;
1882:   ai   = a->i;
1883:   aj   = a->j;
1884:   mbs = a->mbs;

1886:   VecSet(v,zero);
1887:   VecGetArray(v,&x);
1888:   VecGetLocalSize(v,&n);
1889:   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1890:   for (i=0; i<mbs; i++) {
1891:     ncols = ai[1] - ai[0]; ai++;
1892:     brow  = bs*i;
1893:     for (j=0; j<ncols; j++){
1894:       /* bcol = bs*(*aj); */
1895:       for (kcol=0; kcol<bs; kcol++){
1896:         for (krow=0; krow<bs; krow++){
1897:           atmp = PetscAbsScalar(*aa); aa++;
1898:           row = brow + krow;    /* row index */
1899:           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1900:           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1901:         }
1902:       }
1903:       aj++;
1904:     }
1905:   }
1906:   VecRestoreArray(v,&x);
1907:   return(0);
1908: }

1912: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1913: {

1917:   /* If the two matrices have the same copy implementation, use fast copy. */
1918:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1919:     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1920:     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;

1922:     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1923:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1924:     }
1925:     PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));
1926:   } else {
1927:     MatCopy_Basic(A,B,str);
1928:   }
1929:   return(0);
1930: }

1934: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1935: {

1939:    MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);
1940:   return(0);
1941: }

1945: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1946: {
1947:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1949:   *array = a->a;
1950:   return(0);
1951: }

1955: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1956: {
1958:   return(0);
1959: }

1961:  #include petscblaslapack.h
1964: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1965: {
1966:   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1968:   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1969:   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;

1972:   if (str == SAME_NONZERO_PATTERN) {
1973:     PetscScalar alpha = a;
1974:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1975:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1976:     if (y->xtoy && y->XtoY != X) {
1977:       PetscFree(y->xtoy);
1978:       MatDestroy(y->XtoY);
1979:     }
1980:     if (!y->xtoy) { /* get xtoy */
1981:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1982:       y->XtoY = X;
1983:     }
1984:     bs2 = bs*bs;
1985:     for (i=0; i<x->nz; i++) {
1986:       j = 0;
1987:       while (j < bs2){
1988:         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1989:         j++;
1990:       }
1991:     }
1992:     PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1993:   } else {
1994:     MatAXPY_Basic(Y,a,X,str);
1995:   }
1996:   return(0);
1997: }

2001: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2002: {
2003:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2004:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2005:   PetscScalar    *aa = a->a;

2008:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2009:   return(0);
2010: }

2014: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2015: {
2016:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2017:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2018:   PetscScalar    *aa = a->a;

2021:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2022:   return(0);
2023: }


2026: /* -------------------------------------------------------------------*/
2027: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2028:        MatGetRow_SeqBAIJ,
2029:        MatRestoreRow_SeqBAIJ,
2030:        MatMult_SeqBAIJ_N,
2031: /* 4*/ MatMultAdd_SeqBAIJ_N,
2032:        MatMultTranspose_SeqBAIJ,
2033:        MatMultTransposeAdd_SeqBAIJ,
2034:        MatSolve_SeqBAIJ_N,
2035:        0,
2036:        0,
2037: /*10*/ 0,
2038:        MatLUFactor_SeqBAIJ,
2039:        0,
2040:        0,
2041:        MatTranspose_SeqBAIJ,
2042: /*15*/ MatGetInfo_SeqBAIJ,
2043:        MatEqual_SeqBAIJ,
2044:        MatGetDiagonal_SeqBAIJ,
2045:        MatDiagonalScale_SeqBAIJ,
2046:        MatNorm_SeqBAIJ,
2047: /*20*/ 0,
2048:        MatAssemblyEnd_SeqBAIJ,
2049:        0,
2050:        MatSetOption_SeqBAIJ,
2051:        MatZeroEntries_SeqBAIJ,
2052: /*25*/ MatZeroRows_SeqBAIJ,
2053:        MatLUFactorSymbolic_SeqBAIJ,
2054:        MatLUFactorNumeric_SeqBAIJ_N,
2055:        MatCholeskyFactorSymbolic_SeqBAIJ,
2056:        MatCholeskyFactorNumeric_SeqBAIJ_N,
2057: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2058:        MatILUFactorSymbolic_SeqBAIJ,
2059:        MatICCFactorSymbolic_SeqBAIJ,
2060:        MatGetArray_SeqBAIJ,
2061:        MatRestoreArray_SeqBAIJ,
2062: /*35*/ MatDuplicate_SeqBAIJ,
2063:        0,
2064:        0,
2065:        MatILUFactor_SeqBAIJ,
2066:        0,
2067: /*40*/ MatAXPY_SeqBAIJ,
2068:        MatGetSubMatrices_SeqBAIJ,
2069:        MatIncreaseOverlap_SeqBAIJ,
2070:        MatGetValues_SeqBAIJ,
2071:        MatCopy_SeqBAIJ,
2072: /*45*/ 0,
2073:        MatScale_SeqBAIJ,
2074:        0,
2075:        0,
2076:        0,
2077: /*50*/ 0,
2078:        MatGetRowIJ_SeqBAIJ,
2079:        MatRestoreRowIJ_SeqBAIJ,
2080:        0,
2081:        0,
2082: /*55*/ 0,
2083:        0,
2084:        0,
2085:        0,
2086:        MatSetValuesBlocked_SeqBAIJ,
2087: /*60*/ MatGetSubMatrix_SeqBAIJ,
2088:        MatDestroy_SeqBAIJ,
2089:        MatView_SeqBAIJ,
2090:        0,
2091:        0,
2092: /*65*/ 0,
2093:        0,
2094:        0,
2095:        0,
2096:        0,
2097: /*70*/ MatGetRowMax_SeqBAIJ,
2098:        MatConvert_Basic,
2099:        0,
2100:        0,
2101:        0,
2102: /*75*/ 0,
2103:        0,
2104:        0,
2105:        0,
2106:        0,
2107: /*80*/ 0,
2108:        0,
2109:        0,
2110:        0,
2111:        MatLoad_SeqBAIJ,
2112: /*85*/ 0,
2113:        0,
2114:        0,
2115:        0,
2116:        0,
2117: /*90*/ 0,
2118:        0,
2119:        0,
2120:        0,
2121:        0,
2122: /*95*/ 0,
2123:        0,
2124:        0,
2125:        0,
2126:        0,
2127: /*100*/0,
2128:        0,
2129:        0,
2130:        0,
2131:        0,
2132: /*105*/0,
2133:        MatRealPart_SeqBAIJ,
2134:        MatImaginaryPart_SeqBAIJ
2135: };

2140: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2141: {
2142:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2143:   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;

2147:   if (aij->nonew != 1) {
2148:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2149:   }

2151:   /* allocate space for values if not already there */
2152:   if (!aij->saved_values) {
2153:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2154:   }

2156:   /* copy values over */
2157:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2158:   return(0);
2159: }

2165: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2166: {
2167:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2169:   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;

2172:   if (aij->nonew != 1) {
2173:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2174:   }
2175:   if (!aij->saved_values) {
2176:     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2177:   }

2179:   /* copy values over */
2180:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2181:   return(0);
2182: }


2193: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2194: {
2195:   Mat_SeqBAIJ    *b;
2197:   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2198:   PetscTruth     flg,skipallocation = PETSC_FALSE;


2202:   if (nz == MAT_SKIP_ALLOCATION) {
2203:     skipallocation = PETSC_TRUE;
2204:     nz             = 0;
2205:   }

2207:   PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");
2208:     PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);
2209:   PetscOptionsEnd();

2211:   if (nnz && newbs != bs) {
2212:     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2213:   }
2214:   bs   = newbs;

2216:   B->rmap.bs = B->cmap.bs = bs;
2217:   PetscMapInitialize(B->comm,&B->rmap);
2218:   PetscMapInitialize(B->comm,&B->cmap);

2220:   B->preallocated = PETSC_TRUE;

2222:   mbs  = B->rmap.n/bs;
2223:   nbs  = B->cmap.n/bs;
2224:   bs2  = bs*bs;

2226:   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2227:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2228:   }

2230:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2231:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2232:   if (nnz) {
2233:     for (i=0; i<mbs; i++) {
2234:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2235:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2236:     }
2237:   }

2239:   b       = (Mat_SeqBAIJ*)B->data;
2240:   PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2241:     PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2242:   PetscOptionsEnd();

2244:   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2245:   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2246:   if (!flg) {
2247:     switch (bs) {
2248:     case 1:
2249:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2250:       B->ops->mult            = MatMult_SeqBAIJ_1;
2251:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2252:       break;
2253:     case 2:
2254:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2255:       B->ops->mult            = MatMult_SeqBAIJ_2;
2256:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2257:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2258:       break;
2259:     case 3:
2260:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2261:       B->ops->mult            = MatMult_SeqBAIJ_3;
2262:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2263:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2264:       break;
2265:     case 4:
2266:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2267:       B->ops->mult            = MatMult_SeqBAIJ_4;
2268:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2269:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2270:       break;
2271:     case 5:
2272:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2273:       B->ops->mult            = MatMult_SeqBAIJ_5;
2274:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2275:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2276:       break;
2277:     case 6:
2278:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2279:       B->ops->mult            = MatMult_SeqBAIJ_6;
2280:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2281:       break;
2282:     case 7:
2283:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2284:       B->ops->mult            = MatMult_SeqBAIJ_7;
2285:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2286:       break;
2287:     default:
2288:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2289:       B->ops->mult            = MatMult_SeqBAIJ_N;
2290:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2291:       break;
2292:     }
2293:   }
2294:   B->rmap.bs      = bs;
2295:   b->mbs     = mbs;
2296:   b->nbs     = nbs;
2297:   if (!skipallocation) {
2298:     PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2299:     /* b->ilen will count nonzeros in each block row so far. */
2300:     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2301:     if (!nnz) {
2302:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2303:       else if (nz <= 0)        nz = 1;
2304:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2305:       nz = nz*mbs;
2306:     } else {
2307:       nz = 0;
2308:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2309:     }

2311:     /* allocate the matrix space */
2312:     PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);
2313:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2314:     PetscMemzero(b->j,nz*sizeof(PetscInt));
2315:     b->singlemalloc = PETSC_TRUE;

2317:     b->i[0] = 0;
2318:     for (i=1; i<mbs+1; i++) {
2319:       b->i[i] = b->i[i-1] + b->imax[i-1];
2320:     }
2321:     b->free_a     = PETSC_TRUE;
2322:     b->free_ij    = PETSC_TRUE;
2323:   } else {
2324:     b->free_a     = PETSC_FALSE;
2325:     b->free_ij    = PETSC_FALSE;
2326:   }

2328:   B->rmap.bs          = bs;
2329:   b->bs2              = bs2;
2330:   b->mbs              = mbs;
2331:   b->nz               = 0;
2332:   b->maxnz            = nz*bs2;
2333:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2334:   return(0);
2335: }

2338: /*MC
2339:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
2340:    block sparse compressed row format.

2342:    Options Database Keys:
2343: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

2345:   Level: beginner

2347: .seealso: MatCreateSeqBAIJ()
2348: M*/

2353: PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
2354: {
2356:   PetscMPIInt    size;
2357:   Mat_SeqBAIJ    *b;

2360:   MPI_Comm_size(B->comm,&size);
2361:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

2363:   PetscNew(Mat_SeqBAIJ,&b);
2364:   B->data = (void*)b;
2365:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2366:   B->factor           = 0;
2367:   B->mapping          = 0;
2368:   b->row              = 0;
2369:   b->col              = 0;
2370:   b->icol             = 0;
2371:   b->reallocs         = 0;
2372:   b->saved_values     = 0;
2373: #if defined(PETSC_USE_MAT_SINGLE)
2374:   b->setvalueslen     = 0;
2375:   b->setvaluescopy    = PETSC_NULL;
2376: #endif

2378:   b->sorted           = PETSC_FALSE;
2379:   b->roworiented      = PETSC_TRUE;
2380:   b->nonew            = 0;
2381:   b->diag             = 0;
2382:   b->solve_work       = 0;
2383:   b->mult_work        = 0;
2384:   B->spptr            = 0;
2385:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2386:   b->keepzeroedrows   = PETSC_FALSE;
2387:   b->xtoy              = 0;
2388:   b->XtoY              = 0;
2389:   b->compressedrow.use     = PETSC_FALSE;
2390:   b->compressedrow.nrows   = 0;
2391:   b->compressedrow.i       = PETSC_NULL;
2392:   b->compressedrow.rindex  = PETSC_NULL;
2393:   b->compressedrow.checked = PETSC_FALSE;
2394:   B->same_nonzero          = PETSC_FALSE;

2396:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2397:                                      "MatInvertBlockDiagonal_SeqBAIJ",
2398:                                       MatInvertBlockDiagonal_SeqBAIJ);
2399:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2400:                                      "MatStoreValues_SeqBAIJ",
2401:                                       MatStoreValues_SeqBAIJ);
2402:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2403:                                      "MatRetrieveValues_SeqBAIJ",
2404:                                       MatRetrieveValues_SeqBAIJ);
2405:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2406:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2407:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
2408:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2409:                                      "MatConvert_SeqBAIJ_SeqAIJ",
2410:                                       MatConvert_SeqBAIJ_SeqAIJ);
2411:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2412:                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2413:                                       MatConvert_SeqBAIJ_SeqSBAIJ);
2414:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2415:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2416:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
2417:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
2418:   return(0);
2419: }

2424: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2425: {
2426:   Mat            C;
2427:   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2429:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

2432:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

2434:   *B = 0;
2435:   MatCreate(A->comm,&C);
2436:   MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);
2437:   MatSetType(C,A->type_name);
2438:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
2439:   c    = (Mat_SeqBAIJ*)C->data;

2441:   C->rmap.N   = A->rmap.N;
2442:   C->cmap.N   = A->cmap.N;
2443:   C->rmap.bs  = A->rmap.bs;
2444:   c->bs2 = a->bs2;
2445:   c->mbs = a->mbs;
2446:   c->nbs = a->nbs;

2448:   PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
2449:   for (i=0; i<mbs; i++) {
2450:     c->imax[i] = a->imax[i];
2451:     c->ilen[i] = a->ilen[i];
2452:   }

2454:   /* allocate the matrix space */
2455:   PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
2456:   c->singlemalloc = PETSC_TRUE;
2457:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2458:   if (mbs > 0) {
2459:     PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2460:     if (cpvalues == MAT_COPY_VALUES) {
2461:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2462:     } else {
2463:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2464:     }
2465:   }
2466:   c->sorted      = a->sorted;
2467:   c->roworiented = a->roworiented;
2468:   c->nonew       = a->nonew;

2470:   if (a->diag) {
2471:     PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2472:     PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2473:     for (i=0; i<mbs; i++) {
2474:       c->diag[i] = a->diag[i];
2475:     }
2476:   } else c->diag        = 0;
2477:   c->nz                 = a->nz;
2478:   c->maxnz              = a->maxnz;
2479:   c->solve_work         = 0;
2480:   c->mult_work          = 0;
2481:   c->free_a             = PETSC_TRUE;
2482:   c->free_ij            = PETSC_TRUE;
2483:   C->preallocated       = PETSC_TRUE;
2484:   C->assembled          = PETSC_TRUE;

2486:   c->compressedrow.use     = a->compressedrow.use;
2487:   c->compressedrow.nrows   = a->compressedrow.nrows;
2488:   c->compressedrow.checked = a->compressedrow.checked;
2489:   if ( a->compressedrow.checked && a->compressedrow.use){
2490:     i = a->compressedrow.nrows;
2491:     PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);
2492:     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2493:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
2494:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
2495:   } else {
2496:     c->compressedrow.use    = PETSC_FALSE;
2497:     c->compressedrow.i      = PETSC_NULL;
2498:     c->compressedrow.rindex = PETSC_NULL;
2499:   }
2500:   C->same_nonzero = A->same_nonzero;
2501:   *B = C;
2502:   PetscFListDuplicate(A->qlist,&C->qlist);
2503:   return(0);
2504: }

2508: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2509: {
2510:   Mat_SeqBAIJ    *a;
2511:   Mat            B;
2513:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2514:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2515:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2516:   PetscInt       *masked,nmask,tmp,bs2,ishift;
2517:   PetscMPIInt    size;
2518:   int            fd;
2519:   PetscScalar    *aa;
2520:   MPI_Comm       comm = ((PetscObject)viewer)->comm;

2523:   PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
2524:     PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2525:   PetscOptionsEnd();
2526:   bs2  = bs*bs;

2528:   MPI_Comm_size(comm,&size);
2529:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2530:   PetscViewerBinaryGetDescriptor(viewer,&fd);
2531:   PetscBinaryRead(fd,header,4,PETSC_INT);
2532:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2533:   M = header[1]; N = header[2]; nz = header[3];

2535:   if (header[3] < 0) {
2536:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2537:   }

2539:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

2541:   /* 
2542:      This code adds extra rows to make sure the number of rows is 
2543:     divisible by the blocksize
2544:   */
2545:   mbs        = M/bs;
2546:   extra_rows = bs - M + bs*(mbs);
2547:   if (extra_rows == bs) extra_rows = 0;
2548:   else                  mbs++;
2549:   if (extra_rows) {
2550:     PetscInfo(0,"Padding loaded matrix to match blocksize\n");
2551:   }

2553:   /* read in row lengths */
2554:   PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2555:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2556:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

2558:   /* read in column indices */
2559:   PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2560:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
2561:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

2563:   /* loop over row lengths determining block row lengths */
2564:   PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2565:   PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2566:   PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
2567:   PetscMemzero(mask,mbs*sizeof(PetscInt));
2568:   masked   = mask + mbs;
2569:   rowcount = 0; nzcount = 0;
2570:   for (i=0; i<mbs; i++) {
2571:     nmask = 0;
2572:     for (j=0; j<bs; j++) {
2573:       kmax = rowlengths[rowcount];
2574:       for (k=0; k<kmax; k++) {
2575:         tmp = jj[nzcount++]/bs;
2576:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2577:       }
2578:       rowcount++;
2579:     }
2580:     browlengths[i] += nmask;
2581:     /* zero out the mask elements we set */
2582:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2583:   }

2585:   /* create our matrix */
2586:   MatCreate(comm,&B);
2587:   MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2588:   MatSetType(B,type);
2589:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
2590:   a = (Mat_SeqBAIJ*)B->data;

2592:   /* set matrix "i" values */
2593:   a->i[0] = 0;
2594:   for (i=1; i<= mbs; i++) {
2595:     a->i[i]      = a->i[i-1] + browlengths[i-1];
2596:     a->ilen[i-1] = browlengths[i-1];
2597:   }
2598:   a->nz         = 0;
2599:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

2601:   /* read in nonzero values */
2602:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
2603:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
2604:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

2606:   /* set "a" and "j" values into matrix */
2607:   nzcount = 0; jcount = 0;
2608:   for (i=0; i<mbs; i++) {
2609:     nzcountb = nzcount;
2610:     nmask    = 0;
2611:     for (j=0; j<bs; j++) {
2612:       kmax = rowlengths[i*bs+j];
2613:       for (k=0; k<kmax; k++) {
2614:         tmp = jj[nzcount++]/bs;
2615:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2616:       }
2617:     }
2618:     /* sort the masked values */
2619:     PetscSortInt(nmask,masked);

2621:     /* set "j" values into matrix */
2622:     maskcount = 1;
2623:     for (j=0; j<nmask; j++) {
2624:       a->j[jcount++]  = masked[j];
2625:       mask[masked[j]] = maskcount++;
2626:     }
2627:     /* set "a" values into matrix */
2628:     ishift = bs2*a->i[i];
2629:     for (j=0; j<bs; j++) {
2630:       kmax = rowlengths[i*bs+j];
2631:       for (k=0; k<kmax; k++) {
2632:         tmp       = jj[nzcountb]/bs ;
2633:         block     = mask[tmp] - 1;
2634:         point     = jj[nzcountb] - bs*tmp;
2635:         idx       = ishift + bs2*block + j + bs*point;
2636:         a->a[idx] = (MatScalar)aa[nzcountb++];
2637:       }
2638:     }
2639:     /* zero out the mask elements we set */
2640:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2641:   }
2642:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

2644:   PetscFree(rowlengths);
2645:   PetscFree(browlengths);
2646:   PetscFree(aa);
2647:   PetscFree(jj);
2648:   PetscFree(mask);

2650:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2651:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2652:   MatView_Private(B);

2654:   *A = B;
2655:   return(0);
2656: }

2660: /*@C
2661:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2662:    compressed row) format.  For good matrix assembly performance the
2663:    user should preallocate the matrix storage by setting the parameter nz
2664:    (or the array nnz).  By setting these parameters accurately, performance
2665:    during matrix assembly can be increased by more than a factor of 50.

2667:    Collective on MPI_Comm

2669:    Input Parameters:
2670: +  comm - MPI communicator, set to PETSC_COMM_SELF
2671: .  bs - size of block
2672: .  m - number of rows
2673: .  n - number of columns
2674: .  nz - number of nonzero blocks  per block row (same for all rows)
2675: -  nnz - array containing the number of nonzero blocks in the various block rows 
2676:          (possibly different for each block row) or PETSC_NULL

2678:    Output Parameter:
2679: .  A - the matrix 

2681:    Options Database Keys:
2682: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2683:                      block calculations (much slower)
2684: .    -mat_block_size - size of the blocks to use

2686:    Level: intermediate

2688:    Notes:
2689:    The number of rows and columns must be divisible by blocksize.

2691:    If the nnz parameter is given then the nz parameter is ignored

2693:    A nonzero block is any block that as 1 or more nonzeros in it

2695:    The block AIJ format is fully compatible with standard Fortran 77
2696:    storage.  That is, the stored row and column indices can begin at
2697:    either one (as in Fortran) or zero.  See the users' manual for details.

2699:    Specify the preallocated storage with either nz or nnz (not both).
2700:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2701:    allocation.  For additional details, see the users manual chapter on
2702:    matrices.

2704: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2705: @*/
2706: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2707: {
2709: 
2711:   MatCreate(comm,A);
2712:   MatSetSizes(*A,m,n,m,n);
2713:   MatSetType(*A,MATSEQBAIJ);
2714:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
2715:   return(0);
2716: }

2720: /*@C
2721:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2722:    per row in the matrix. For good matrix assembly performance the
2723:    user should preallocate the matrix storage by setting the parameter nz
2724:    (or the array nnz).  By setting these parameters accurately, performance
2725:    during matrix assembly can be increased by more than a factor of 50.

2727:    Collective on MPI_Comm

2729:    Input Parameters:
2730: +  A - the matrix
2731: .  bs - size of block
2732: .  nz - number of block nonzeros per block row (same for all rows)
2733: -  nnz - array containing the number of block nonzeros in the various block rows 
2734:          (possibly different for each block row) or PETSC_NULL

2736:    Options Database Keys:
2737: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2738:                      block calculations (much slower)
2739: .    -mat_block_size - size of the blocks to use

2741:    Level: intermediate

2743:    Notes:
2744:    If the nnz parameter is given then the nz parameter is ignored

2746:    The block AIJ format is fully compatible with standard Fortran 77
2747:    storage.  That is, the stored row and column indices can begin at
2748:    either one (as in Fortran) or zero.  See the users' manual for details.

2750:    Specify the preallocated storage with either nz or nnz (not both).
2751:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2752:    allocation.  For additional details, see the users manual chapter on
2753:    matrices.

2755: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2756: @*/
2757: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2758: {
2759:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);

2762:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2763:   if (f) {
2764:     (*f)(B,bs,nz,nnz);
2765:   }
2766:   return(0);
2767: }

2771: /*@
2772:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 
2773:               (upper triangular entries in CSR format) provided by the user.

2775:      Collective on MPI_Comm

2777:    Input Parameters:
2778: +  comm - must be an MPI communicator of size 1
2779: .  bs - size of block
2780: .  m - number of rows
2781: .  n - number of columns
2782: .  i - row indices
2783: .  j - column indices
2784: -  a - matrix values

2786:    Output Parameter:
2787: .  mat - the matrix

2789:    Level: intermediate

2791:    Notes:
2792:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2793:     once the matrix is destroyed

2795:        You cannot set new nonzero locations into this matrix, that will generate an error.

2797:        The i and j indices are 0 based

2799: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()

2801: @*/
2802: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2803: {
2805:   PetscInt       ii;
2806:   Mat_SeqBAIJ    *baij;

2809:   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2810:   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2811: 
2812:   MatCreate(comm,mat);
2813:   MatSetSizes(*mat,m,n,m,n);
2814:   MatSetType(*mat,MATSEQBAIJ);
2815:   MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
2816:   baij = (Mat_SeqBAIJ*)(*mat)->data;
2817:   PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);

2819:   baij->i = i;
2820:   baij->j = j;
2821:   baij->a = a;
2822:   baij->singlemalloc = PETSC_FALSE;
2823:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2824:   baij->free_a       = PETSC_FALSE;
2825:   baij->free_ij       = PETSC_FALSE;

2827:   for (ii=0; ii<m; ii++) {
2828:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2829: #if defined(PETSC_USE_DEBUG)
2830:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2831: #endif    
2832:   }
2833: #if defined(PETSC_USE_DEBUG)
2834:   for (ii=0; ii<baij->i[m]; ii++) {
2835:     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2836:     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2837:   }
2838: #endif    

2840:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
2841:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
2842:   return(0);
2843: }