Actual source code: baij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/inline/spops.h
9: #include petscsys.h
11: #include src/inline/ilu.h
15: /*@C
16: MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
18: Collective on Mat
20: Input Parameters:
21: . mat - the matrix
23: Level: advanced
24: @*/
25: PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat)
26: {
27: PetscErrorCode ierr,(*f)(Mat);
31: if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
34: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
35: if (f) {
36: (*f)(mat);
37: } else {
38: SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39: }
40: return(0);
41: }
46: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47: {
48: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
50: PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51: PetscScalar *v = a->a,*odiag,*diag,*mdiag;
54: if (a->idiagvalid) return(0);
55: MatMarkDiagonal_SeqBAIJ(A);
56: diag_offset = a->diag;
57: if (!a->idiag) {
58: PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
59: }
60: diag = a->idiag;
61: mdiag = a->idiag+bs*bs*mbs;
62: /* factor and invert each block */
63: switch (bs){
64: case 2:
65: for (i=0; i<mbs; i++) {
66: odiag = v + 4*diag_offset[i];
67: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69: Kernel_A_gets_inverse_A_2(diag);
70: diag += 4;
71: mdiag += 4;
72: }
73: break;
74: case 3:
75: for (i=0; i<mbs; i++) {
76: odiag = v + 9*diag_offset[i];
77: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79: diag[8] = odiag[8];
80: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82: mdiag[8] = odiag[8];
83: Kernel_A_gets_inverse_A_3(diag);
84: diag += 9;
85: mdiag += 9;
86: }
87: break;
88: case 4:
89: for (i=0; i<mbs; i++) {
90: odiag = v + 16*diag_offset[i];
91: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
92: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
93: Kernel_A_gets_inverse_A_4(diag);
94: diag += 16;
95: mdiag += 16;
96: }
97: break;
98: case 5:
99: for (i=0; i<mbs; i++) {
100: odiag = v + 25*diag_offset[i];
101: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
102: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
103: Kernel_A_gets_inverse_A_5(diag);
104: diag += 25;
105: mdiag += 25;
106: }
107: break;
108: default:
109: SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110: }
111: a->idiagvalid = PETSC_TRUE;
112: return(0);
113: }
118: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119: {
120: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
121: PetscScalar *x,x1,x2,s1,s2;
122: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
123: PetscErrorCode ierr;
124: PetscInt m = a->mbs,i,i2,nz,idx;
125: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
128: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129: its = its*lits;
130: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
136: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
138: diag = a->diag;
139: idiag = a->idiag;
140: VecGetArray(xx,&x);
141: VecGetArray(bb,(PetscScalar**)&b);
143: if (flag & SOR_ZERO_INITIAL_GUESS) {
144: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145: x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146: x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147: i2 = 2;
148: idiag += 4;
149: for (i=1; i<m; i++) {
150: v = aa + 4*ai[i];
151: vi = aj + ai[i];
152: nz = diag[i] - ai[i];
153: s1 = b[i2]; s2 = b[i2+1];
154: while (nz--) {
155: idx = 2*(*vi++);
156: x1 = x[idx]; x2 = x[1+idx];
157: s1 -= v[0]*x1 + v[2]*x2;
158: s2 -= v[1]*x1 + v[3]*x2;
159: v += 4;
160: }
161: x[i2] = idiag[0]*s1 + idiag[2]*s2;
162: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163: idiag += 4;
164: i2 += 2;
165: }
166: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167: PetscLogFlops(4*(a->nz));
168: }
169: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171: i2 = 0;
172: mdiag = a->idiag+4*a->mbs;
173: for (i=0; i<m; i++) {
174: x1 = x[i2]; x2 = x[i2+1];
175: x[i2] = mdiag[0]*x1 + mdiag[2]*x2;
176: x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177: mdiag += 4;
178: i2 += 2;
179: }
180: PetscLogFlops(6*m);
181: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
183: }
184: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185: idiag = a->idiag+4*a->mbs - 4;
186: i2 = 2*m - 2;
187: x1 = x[i2]; x2 = x[i2+1];
188: x[i2] = idiag[0]*x1 + idiag[2]*x2;
189: x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190: idiag -= 4;
191: i2 -= 2;
192: for (i=m-2; i>=0; i--) {
193: v = aa + 4*(diag[i]+1);
194: vi = aj + diag[i] + 1;
195: nz = ai[i+1] - diag[i] - 1;
196: s1 = x[i2]; s2 = x[i2+1];
197: while (nz--) {
198: idx = 2*(*vi++);
199: x1 = x[idx]; x2 = x[1+idx];
200: s1 -= v[0]*x1 + v[2]*x2;
201: s2 -= v[1]*x1 + v[3]*x2;
202: v += 4;
203: }
204: x[i2] = idiag[0]*s1 + idiag[2]*s2;
205: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206: idiag -= 4;
207: i2 -= 2;
208: }
209: PetscLogFlops(4*(a->nz));
210: }
211: } else {
212: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213: }
214: VecRestoreArray(xx,&x);
215: VecRestoreArray(bb,(PetscScalar**)&b);
216: return(0);
217: }
221: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222: {
223: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
224: PetscScalar *x,x1,x2,x3,s1,s2,s3;
225: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
226: PetscErrorCode ierr;
227: PetscInt m = a->mbs,i,i2,nz,idx;
228: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
231: its = its*lits;
232: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
240: diag = a->diag;
241: idiag = a->idiag;
242: VecGetArray(xx,&x);
243: VecGetArray(bb,(PetscScalar**)&b);
245: if (flag & SOR_ZERO_INITIAL_GUESS) {
246: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247: x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248: x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249: x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250: i2 = 3;
251: idiag += 9;
252: for (i=1; i<m; i++) {
253: v = aa + 9*ai[i];
254: vi = aj + ai[i];
255: nz = diag[i] - ai[i];
256: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257: while (nz--) {
258: idx = 3*(*vi++);
259: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263: v += 9;
264: }
265: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268: idiag += 9;
269: i2 += 3;
270: }
271: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272: PetscLogFlops(9*(a->nz));
273: }
274: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276: i2 = 0;
277: mdiag = a->idiag+9*a->mbs;
278: for (i=0; i<m; i++) {
279: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280: x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281: x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282: x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283: mdiag += 9;
284: i2 += 3;
285: }
286: PetscLogFlops(15*m);
287: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
289: }
290: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291: idiag = a->idiag+9*a->mbs - 9;
292: i2 = 3*m - 3;
293: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294: x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295: x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296: x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297: idiag -= 9;
298: i2 -= 3;
299: for (i=m-2; i>=0; i--) {
300: v = aa + 9*(diag[i]+1);
301: vi = aj + diag[i] + 1;
302: nz = ai[i+1] - diag[i] - 1;
303: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304: while (nz--) {
305: idx = 3*(*vi++);
306: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310: v += 9;
311: }
312: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315: idiag -= 9;
316: i2 -= 3;
317: }
318: PetscLogFlops(9*(a->nz));
319: }
320: } else {
321: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322: }
323: VecRestoreArray(xx,&x);
324: VecRestoreArray(bb,(PetscScalar**)&b);
325: return(0);
326: }
330: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331: {
332: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
333: PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4;
334: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
335: PetscErrorCode ierr;
336: PetscInt m = a->mbs,i,i2,nz,idx;
337: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
340: its = its*lits;
341: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
347: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
349: diag = a->diag;
350: idiag = a->idiag;
351: VecGetArray(xx,&x);
352: VecGetArray(bb,(PetscScalar**)&b);
354: if (flag & SOR_ZERO_INITIAL_GUESS) {
355: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356: x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12];
357: x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13];
358: x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359: x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360: i2 = 4;
361: idiag += 16;
362: for (i=1; i<m; i++) {
363: v = aa + 16*ai[i];
364: vi = aj + ai[i];
365: nz = diag[i] - ai[i];
366: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367: while (nz--) {
368: idx = 4*(*vi++);
369: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
371: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
372: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374: v += 16;
375: }
376: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
377: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
378: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380: idiag += 16;
381: i2 += 4;
382: }
383: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384: PetscLogFlops(16*(a->nz));
385: }
386: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388: i2 = 0;
389: mdiag = a->idiag+16*a->mbs;
390: for (i=0; i<m; i++) {
391: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392: x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4;
393: x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4;
394: x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395: x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396: mdiag += 16;
397: i2 += 4;
398: }
399: PetscLogFlops(28*m);
400: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
402: }
403: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404: idiag = a->idiag+16*a->mbs - 16;
405: i2 = 4*m - 4;
406: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407: x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4;
408: x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4;
409: x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410: x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411: idiag -= 16;
412: i2 -= 4;
413: for (i=m-2; i>=0; i--) {
414: v = aa + 16*(diag[i]+1);
415: vi = aj + diag[i] + 1;
416: nz = ai[i+1] - diag[i] - 1;
417: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418: while (nz--) {
419: idx = 4*(*vi++);
420: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
422: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
423: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425: v += 16;
426: }
427: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
428: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
429: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431: idiag -= 16;
432: i2 -= 4;
433: }
434: PetscLogFlops(16*(a->nz));
435: }
436: } else {
437: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438: }
439: VecRestoreArray(xx,&x);
440: VecRestoreArray(bb,(PetscScalar**)&b);
441: return(0);
442: }
446: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447: {
448: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
449: PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
451: PetscErrorCode ierr;
452: PetscInt m = a->mbs,i,i2,nz,idx;
453: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
456: its = its*lits;
457: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
463: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
465: diag = a->diag;
466: idiag = a->idiag;
467: VecGetArray(xx,&x);
468: VecGetArray(bb,(PetscScalar**)&b);
470: if (flag & SOR_ZERO_INITIAL_GUESS) {
471: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472: x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473: x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474: x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475: x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476: x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477: i2 = 5;
478: idiag += 25;
479: for (i=1; i<m; i++) {
480: v = aa + 25*ai[i];
481: vi = aj + ai[i];
482: nz = diag[i] - ai[i];
483: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484: while (nz--) {
485: idx = 5*(*vi++);
486: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492: v += 25;
493: }
494: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499: idiag += 25;
500: i2 += 5;
501: }
502: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503: PetscLogFlops(25*(a->nz));
504: }
505: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507: i2 = 0;
508: mdiag = a->idiag+25*a->mbs;
509: for (i=0; i<m; i++) {
510: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511: x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512: x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513: x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514: x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515: x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516: mdiag += 25;
517: i2 += 5;
518: }
519: PetscLogFlops(45*m);
520: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
522: }
523: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524: idiag = a->idiag+25*a->mbs - 25;
525: i2 = 5*m - 5;
526: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527: x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528: x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529: x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530: x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531: x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532: idiag -= 25;
533: i2 -= 5;
534: for (i=m-2; i>=0; i--) {
535: v = aa + 25*(diag[i]+1);
536: vi = aj + diag[i] + 1;
537: nz = ai[i+1] - diag[i] - 1;
538: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539: while (nz--) {
540: idx = 5*(*vi++);
541: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547: v += 25;
548: }
549: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554: idiag -= 25;
555: i2 -= 5;
556: }
557: PetscLogFlops(25*(a->nz));
558: }
559: } else {
560: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561: }
562: VecRestoreArray(xx,&x);
563: VecRestoreArray(bb,(PetscScalar**)&b);
564: return(0);
565: }
567: /*
568: Special version for direct calls from Fortran (Used in PETSc-fun3d)
569: */
570: #if defined(PETSC_HAVE_FORTRAN_CAPS)
571: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573: #define matsetvaluesblocked4_ matsetvaluesblocked4
574: #endif
579: void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580: {
581: Mat A = *AA;
582: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
583: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584: PetscInt *ai=a->i,*ailen=a->ilen;
585: PetscInt *aj=a->j,stepval,lastcol = -1;
586: const PetscScalar *value = v;
587: MatScalar *ap,*aa = a->a,*bap;
590: if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591: stepval = (n-1)*4;
592: for (k=0; k<m; k++) { /* loop over added rows */
593: row = im[k];
594: rp = aj + ai[row];
595: ap = aa + 16*ai[row];
596: nrow = ailen[row];
597: low = 0;
598: high = nrow;
599: for (l=0; l<n; l++) { /* loop over added columns */
600: col = in[l];
601: if (col <= lastcol) low = 0; else high = nrow;
602: lastcol = col;
603: value = v + k*(stepval+4 + l)*4;
604: while (high-low > 7) {
605: t = (low+high)/2;
606: if (rp[t] > col) high = t;
607: else low = t;
608: }
609: for (i=low; i<high; i++) {
610: if (rp[i] > col) break;
611: if (rp[i] == col) {
612: bap = ap + 16*i;
613: for (ii=0; ii<4; ii++,value+=stepval) {
614: for (jj=ii; jj<16; jj+=4) {
615: bap[jj] += *value++;
616: }
617: }
618: goto noinsert2;
619: }
620: }
621: N = nrow++ - 1;
622: high++; /* added new column index thus must search to one higher than before */
623: /* shift up all the later entries in this row */
624: for (ii=N; ii>=i; ii--) {
625: rp[ii+1] = rp[ii];
626: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627: }
628: if (N >= i) {
629: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630: }
631: rp[i] = col;
632: bap = ap + 16*i;
633: for (ii=0; ii<4; ii++,value+=stepval) {
634: for (jj=ii; jj<16; jj+=4) {
635: bap[jj] = *value++;
636: }
637: }
638: noinsert2:;
639: low = i;
640: }
641: ailen[row] = nrow;
642: }
643: PetscFunctionReturnVoid();
644: }
647: #if defined(PETSC_HAVE_FORTRAN_CAPS)
648: #define matsetvalues4_ MATSETVALUES4
649: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650: #define matsetvalues4_ matsetvalues4
651: #endif
656: void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657: {
658: Mat A = *AA;
659: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661: PetscInt *ai=a->i,*ailen=a->ilen;
662: PetscInt *aj=a->j,brow,bcol;
663: PetscInt ridx,cidx,lastcol = -1;
664: MatScalar *ap,value,*aa=a->a,*bap;
665:
667: for (k=0; k<m; k++) { /* loop over added rows */
668: row = im[k]; brow = row/4;
669: rp = aj + ai[brow];
670: ap = aa + 16*ai[brow];
671: nrow = ailen[brow];
672: low = 0;
673: high = nrow;
674: for (l=0; l<n; l++) { /* loop over added columns */
675: col = in[l]; bcol = col/4;
676: ridx = row % 4; cidx = col % 4;
677: value = v[l + k*n];
678: if (col <= lastcol) low = 0; else high = nrow;
679: lastcol = col;
680: while (high-low > 7) {
681: t = (low+high)/2;
682: if (rp[t] > bcol) high = t;
683: else low = t;
684: }
685: for (i=low; i<high; i++) {
686: if (rp[i] > bcol) break;
687: if (rp[i] == bcol) {
688: bap = ap + 16*i + 4*cidx + ridx;
689: *bap += value;
690: goto noinsert1;
691: }
692: }
693: N = nrow++ - 1;
694: high++; /* added new column thus must search to one higher than before */
695: /* shift up all the later entries in this row */
696: for (ii=N; ii>=i; ii--) {
697: rp[ii+1] = rp[ii];
698: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699: }
700: if (N>=i) {
701: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702: }
703: rp[i] = bcol;
704: ap[16*i + 4*cidx + ridx] = value;
705: noinsert1:;
706: low = i;
707: }
708: ailen[brow] = nrow;
709: }
710: PetscFunctionReturnVoid();
711: }
714: /* UGLY, ugly, ugly
715: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718: converts the entries into single precision and then calls ..._MatScalar() to put them
719: into the single precision data structures.
720: */
721: #if defined(PETSC_USE_MAT_SINGLE)
722: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723: #else
724: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725: #endif
727: #define CHUNKSIZE 10
729: /*
730: Checks for missing diagonals
731: */
734: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735: {
736: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
738: PetscInt *diag,*jj = a->j,i;
741: MatMarkDiagonal_SeqBAIJ(A);
742: diag = a->diag;
743: for (i=0; i<a->mbs; i++) {
744: if (jj[diag[i]] != i) {
745: SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746: }
747: }
748: return(0);
749: }
753: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754: {
755: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
757: PetscInt i,j,m = a->mbs;
760: if (!a->diag) {
761: PetscMalloc(m*sizeof(PetscInt),&a->diag);
762: }
763: for (i=0; i<m; i++) {
764: a->diag[i] = a->i[i+1];
765: for (j=a->i[i]; j<a->i[i+1]; j++) {
766: if (a->j[j] == i) {
767: a->diag[i] = j;
768: break;
769: }
770: }
771: }
772: return(0);
773: }
776: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
780: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781: {
782: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
784: PetscInt n = a->mbs,i;
787: *nn = n;
788: if (!ia) return(0);
789: if (symmetric) {
790: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
791: } else if (oshift == 1) {
792: /* temporarily add 1 to i and j indices */
793: PetscInt nz = a->i[n];
794: for (i=0; i<nz; i++) a->j[i]++;
795: for (i=0; i<n+1; i++) a->i[i]++;
796: *ia = a->i; *ja = a->j;
797: } else {
798: *ia = a->i; *ja = a->j;
799: }
801: return(0);
802: }
806: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
807: {
808: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
810: PetscInt i,n = a->mbs;
813: if (!ia) return(0);
814: if (symmetric) {
815: PetscFree(*ia);
816: PetscFree(*ja);
817: } else if (oshift == 1) {
818: PetscInt nz = a->i[n]-1;
819: for (i=0; i<nz; i++) a->j[i]--;
820: for (i=0; i<n+1; i++) a->i[i]--;
821: }
822: return(0);
823: }
827: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
828: {
829: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
833: #if defined(PETSC_USE_LOG)
834: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
835: #endif
836: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
837: if (a->row) {
838: ISDestroy(a->row);
839: }
840: if (a->col) {
841: ISDestroy(a->col);
842: }
843: PetscFree(a->diag);
844: PetscFree(a->idiag);
845: PetscFree2(a->imax,a->ilen);
846: PetscFree(a->solve_work);
847: PetscFree(a->mult_work);
848: if (a->icol) {ISDestroy(a->icol);}
849: PetscFree(a->saved_values);
850: #if defined(PETSC_USE_MAT_SINGLE)
851: PetscFree(a->setvaluescopy);
852: #endif
853: PetscFree(a->xtoy);
854: if (a->compressedrow.use){PetscFree(a->compressedrow.i);}
856: PetscFree(a);
858: PetscObjectChangeTypeName((PetscObject)A,0);
859: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
860: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
861: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
862: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
863: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
864: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
865: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
866: return(0);
867: }
871: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
872: {
873: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
877: switch (op) {
878: case MAT_ROW_ORIENTED:
879: a->roworiented = PETSC_TRUE;
880: break;
881: case MAT_COLUMN_ORIENTED:
882: a->roworiented = PETSC_FALSE;
883: break;
884: case MAT_COLUMNS_SORTED:
885: a->sorted = PETSC_TRUE;
886: break;
887: case MAT_COLUMNS_UNSORTED:
888: a->sorted = PETSC_FALSE;
889: break;
890: case MAT_KEEP_ZEROED_ROWS:
891: a->keepzeroedrows = PETSC_TRUE;
892: break;
893: case MAT_NO_NEW_NONZERO_LOCATIONS:
894: a->nonew = 1;
895: break;
896: case MAT_NEW_NONZERO_LOCATION_ERR:
897: a->nonew = -1;
898: break;
899: case MAT_NEW_NONZERO_ALLOCATION_ERR:
900: a->nonew = -2;
901: break;
902: case MAT_YES_NEW_NONZERO_LOCATIONS:
903: a->nonew = 0;
904: break;
905: case MAT_ROWS_SORTED:
906: case MAT_ROWS_UNSORTED:
907: case MAT_YES_NEW_DIAGONALS:
908: case MAT_IGNORE_OFF_PROC_ENTRIES:
909: case MAT_USE_HASH_TABLE:
910: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
911: break;
912: case MAT_NO_NEW_DIAGONALS:
913: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
914: case MAT_SYMMETRIC:
915: case MAT_STRUCTURALLY_SYMMETRIC:
916: case MAT_NOT_SYMMETRIC:
917: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
918: case MAT_HERMITIAN:
919: case MAT_NOT_HERMITIAN:
920: case MAT_SYMMETRY_ETERNAL:
921: case MAT_NOT_SYMMETRY_ETERNAL:
922: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
923: break;
924: default:
925: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
926: }
927: return(0);
928: }
932: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
933: {
934: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
936: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
937: MatScalar *aa,*aa_i;
938: PetscScalar *v_i;
941: bs = A->rmap.bs;
942: ai = a->i;
943: aj = a->j;
944: aa = a->a;
945: bs2 = a->bs2;
946:
947: if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
948:
949: bn = row/bs; /* Block number */
950: bp = row % bs; /* Block Position */
951: M = ai[bn+1] - ai[bn];
952: *nz = bs*M;
953:
954: if (v) {
955: *v = 0;
956: if (*nz) {
957: PetscMalloc((*nz)*sizeof(PetscScalar),v);
958: for (i=0; i<M; i++) { /* for each block in the block row */
959: v_i = *v + i*bs;
960: aa_i = aa + bs2*(ai[bn] + i);
961: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
962: }
963: }
964: }
966: if (idx) {
967: *idx = 0;
968: if (*nz) {
969: PetscMalloc((*nz)*sizeof(PetscInt),idx);
970: for (i=0; i<M; i++) { /* for each block in the block row */
971: idx_i = *idx + i*bs;
972: itmp = bs*aj[ai[bn] + i];
973: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
974: }
975: }
976: }
977: return(0);
978: }
982: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
983: {
987: if (idx) {PetscFree(*idx);}
988: if (v) {PetscFree(*v);}
989: return(0);
990: }
994: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
995: {
996: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
997: Mat C;
999: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1000: PetscInt *rows,*cols,bs2=a->bs2;
1001: PetscScalar *array;
1004: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1005: PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1006: PetscMemzero(col,(1+nbs)*sizeof(PetscInt));
1008: #if defined(PETSC_USE_MAT_SINGLE)
1009: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
1010: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1011: #else
1012: array = a->a;
1013: #endif
1015: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1016: MatCreate(A->comm,&C);
1017: MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);
1018: MatSetType(C,A->type_name);
1019: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1020: PetscFree(col);
1021: PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1022: cols = rows + bs;
1023: for (i=0; i<mbs; i++) {
1024: cols[0] = i*bs;
1025: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1026: len = ai[i+1] - ai[i];
1027: for (j=0; j<len; j++) {
1028: rows[0] = (*aj++)*bs;
1029: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1030: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1031: array += bs2;
1032: }
1033: }
1034: PetscFree(rows);
1035: #if defined(PETSC_USE_MAT_SINGLE)
1036: PetscFree(array);
1037: #endif
1038:
1039: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1040: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1041:
1042: if (B) {
1043: *B = C;
1044: } else {
1045: MatHeaderCopy(A,C);
1046: }
1047: return(0);
1048: }
1052: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1053: {
1054: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1056: PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1057: int fd;
1058: PetscScalar *aa;
1059: FILE *file;
1062: PetscViewerBinaryGetDescriptor(viewer,&fd);
1063: PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);
1064: col_lens[0] = MAT_FILE_COOKIE;
1066: col_lens[1] = A->rmap.N;
1067: col_lens[2] = A->cmap.n;
1068: col_lens[3] = a->nz*bs2;
1070: /* store lengths of each row and write (including header) to file */
1071: count = 0;
1072: for (i=0; i<a->mbs; i++) {
1073: for (j=0; j<bs; j++) {
1074: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1075: }
1076: }
1077: PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);
1078: PetscFree(col_lens);
1080: /* store column indices (zero start index) */
1081: PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1082: count = 0;
1083: for (i=0; i<a->mbs; i++) {
1084: for (j=0; j<bs; j++) {
1085: for (k=a->i[i]; k<a->i[i+1]; k++) {
1086: for (l=0; l<bs; l++) {
1087: jj[count++] = bs*a->j[k] + l;
1088: }
1089: }
1090: }
1091: }
1092: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1093: PetscFree(jj);
1095: /* store nonzero values */
1096: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1097: count = 0;
1098: for (i=0; i<a->mbs; i++) {
1099: for (j=0; j<bs; j++) {
1100: for (k=a->i[i]; k<a->i[i+1]; k++) {
1101: for (l=0; l<bs; l++) {
1102: aa[count++] = a->a[bs2*k + l*bs + j];
1103: }
1104: }
1105: }
1106: }
1107: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1108: PetscFree(aa);
1110: PetscViewerBinaryGetInfoPointer(viewer,&file);
1111: if (file) {
1112: fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1113: }
1114: return(0);
1115: }
1119: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1120: {
1121: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1122: PetscErrorCode ierr;
1123: PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1124: PetscViewerFormat format;
1127: PetscViewerGetFormat(viewer,&format);
1128: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1129: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1130: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1131: Mat aij;
1132: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1133: MatView(aij,viewer);
1134: MatDestroy(aij);
1135: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1136: return(0);
1137: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1138: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1139: for (i=0; i<a->mbs; i++) {
1140: for (j=0; j<bs; j++) {
1141: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1142: for (k=a->i[i]; k<a->i[i+1]; k++) {
1143: for (l=0; l<bs; l++) {
1144: #if defined(PETSC_USE_COMPLEX)
1145: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1146: PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1147: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1148: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1149: PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1150: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1151: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1152: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1153: }
1154: #else
1155: if (a->a[bs2*k + l*bs + j] != 0.0) {
1156: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1157: }
1158: #endif
1159: }
1160: }
1161: PetscViewerASCIIPrintf(viewer,"\n");
1162: }
1163: }
1164: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1165: } else {
1166: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1167: for (i=0; i<a->mbs; i++) {
1168: for (j=0; j<bs; j++) {
1169: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1170: for (k=a->i[i]; k<a->i[i+1]; k++) {
1171: for (l=0; l<bs; l++) {
1172: #if defined(PETSC_USE_COMPLEX)
1173: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1174: PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1175: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1176: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1177: PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1178: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1179: } else {
1180: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1181: }
1182: #else
1183: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1184: #endif
1185: }
1186: }
1187: PetscViewerASCIIPrintf(viewer,"\n");
1188: }
1189: }
1190: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1191: }
1192: PetscViewerFlush(viewer);
1193: return(0);
1194: }
1198: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1199: {
1200: Mat A = (Mat) Aa;
1201: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1203: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1204: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1205: MatScalar *aa;
1206: PetscViewer viewer;
1210: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1211: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1213: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1215: /* loop over matrix elements drawing boxes */
1216: color = PETSC_DRAW_BLUE;
1217: for (i=0,row=0; i<mbs; i++,row+=bs) {
1218: for (j=a->i[i]; j<a->i[i+1]; j++) {
1219: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1220: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1221: aa = a->a + j*bs2;
1222: for (k=0; k<bs; k++) {
1223: for (l=0; l<bs; l++) {
1224: if (PetscRealPart(*aa++) >= 0.) continue;
1225: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1226: }
1227: }
1228: }
1229: }
1230: color = PETSC_DRAW_CYAN;
1231: for (i=0,row=0; i<mbs; i++,row+=bs) {
1232: for (j=a->i[i]; j<a->i[i+1]; j++) {
1233: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1234: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1235: aa = a->a + j*bs2;
1236: for (k=0; k<bs; k++) {
1237: for (l=0; l<bs; l++) {
1238: if (PetscRealPart(*aa++) != 0.) continue;
1239: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1240: }
1241: }
1242: }
1243: }
1245: color = PETSC_DRAW_RED;
1246: for (i=0,row=0; i<mbs; i++,row+=bs) {
1247: for (j=a->i[i]; j<a->i[i+1]; j++) {
1248: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1249: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1250: aa = a->a + j*bs2;
1251: for (k=0; k<bs; k++) {
1252: for (l=0; l<bs; l++) {
1253: if (PetscRealPart(*aa++) <= 0.) continue;
1254: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1255: }
1256: }
1257: }
1258: }
1259: return(0);
1260: }
1264: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1265: {
1267: PetscReal xl,yl,xr,yr,w,h;
1268: PetscDraw draw;
1269: PetscTruth isnull;
1273: PetscViewerDrawGetDraw(viewer,0,&draw);
1274: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1276: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1277: xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1278: xr += w; yr += h; xl = -w; yl = -h;
1279: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1280: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1281: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1282: return(0);
1283: }
1287: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1288: {
1290: PetscTruth iascii,isbinary,isdraw;
1293: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1294: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1295: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1296: if (iascii){
1297: MatView_SeqBAIJ_ASCII(A,viewer);
1298: } else if (isbinary) {
1299: MatView_SeqBAIJ_Binary(A,viewer);
1300: } else if (isdraw) {
1301: MatView_SeqBAIJ_Draw(A,viewer);
1302: } else {
1303: Mat B;
1304: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1305: MatView(B,viewer);
1306: MatDestroy(B);
1307: }
1308: return(0);
1309: }
1314: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1315: {
1316: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1317: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1318: PetscInt *ai = a->i,*ailen = a->ilen;
1319: PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1320: MatScalar *ap,*aa = a->a,zero = 0.0;
1323: for (k=0; k<m; k++) { /* loop over rows */
1324: row = im[k]; brow = row/bs;
1325: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1326: if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1327: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1328: nrow = ailen[brow];
1329: for (l=0; l<n; l++) { /* loop over columns */
1330: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1331: if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1332: col = in[l] ;
1333: bcol = col/bs;
1334: cidx = col%bs;
1335: ridx = row%bs;
1336: high = nrow;
1337: low = 0; /* assume unsorted */
1338: while (high-low > 5) {
1339: t = (low+high)/2;
1340: if (rp[t] > bcol) high = t;
1341: else low = t;
1342: }
1343: for (i=low; i<high; i++) {
1344: if (rp[i] > bcol) break;
1345: if (rp[i] == bcol) {
1346: *v++ = ap[bs2*i+bs*cidx+ridx];
1347: goto finished;
1348: }
1349: }
1350: *v++ = zero;
1351: finished:;
1352: }
1353: }
1354: return(0);
1355: }
1357: #if defined(PETSC_USE_MAT_SINGLE)
1360: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1361: {
1362: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1364: PetscInt i,N = m*n*b->bs2;
1365: MatScalar *vsingle;
1368: if (N > b->setvalueslen) {
1369: PetscFree(b->setvaluescopy);
1370: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1371: b->setvalueslen = N;
1372: }
1373: vsingle = b->setvaluescopy;
1374: for (i=0; i<N; i++) {
1375: vsingle[i] = v[i];
1376: }
1377: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1378: return(0);
1379: }
1380: #endif
1385: PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1386: {
1387: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1388: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1389: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1390: PetscErrorCode ierr;
1391: PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1392: PetscTruth roworiented=a->roworiented;
1393: const MatScalar *value = v;
1394: MatScalar *ap,*aa = a->a,*bap;
1397: if (roworiented) {
1398: stepval = (n-1)*bs;
1399: } else {
1400: stepval = (m-1)*bs;
1401: }
1402: for (k=0; k<m; k++) { /* loop over added rows */
1403: row = im[k];
1404: if (row < 0) continue;
1405: #if defined(PETSC_USE_DEBUG)
1406: if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1407: #endif
1408: rp = aj + ai[row];
1409: ap = aa + bs2*ai[row];
1410: rmax = imax[row];
1411: nrow = ailen[row];
1412: low = 0;
1413: high = nrow;
1414: for (l=0; l<n; l++) { /* loop over added columns */
1415: if (in[l] < 0) continue;
1416: #if defined(PETSC_USE_DEBUG)
1417: if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1418: #endif
1419: col = in[l];
1420: if (roworiented) {
1421: value = v + k*(stepval+bs)*bs + l*bs;
1422: } else {
1423: value = v + l*(stepval+bs)*bs + k*bs;
1424: }
1425: if (col <= lastcol) low = 0; else high = nrow;
1426: lastcol = col;
1427: while (high-low > 7) {
1428: t = (low+high)/2;
1429: if (rp[t] > col) high = t;
1430: else low = t;
1431: }
1432: for (i=low; i<high; i++) {
1433: if (rp[i] > col) break;
1434: if (rp[i] == col) {
1435: bap = ap + bs2*i;
1436: if (roworiented) {
1437: if (is == ADD_VALUES) {
1438: for (ii=0; ii<bs; ii++,value+=stepval) {
1439: for (jj=ii; jj<bs2; jj+=bs) {
1440: bap[jj] += *value++;
1441: }
1442: }
1443: } else {
1444: for (ii=0; ii<bs; ii++,value+=stepval) {
1445: for (jj=ii; jj<bs2; jj+=bs) {
1446: bap[jj] = *value++;
1447: }
1448: }
1449: }
1450: } else {
1451: if (is == ADD_VALUES) {
1452: for (ii=0; ii<bs; ii++,value+=stepval) {
1453: for (jj=0; jj<bs; jj++) {
1454: *bap++ += *value++;
1455: }
1456: }
1457: } else {
1458: for (ii=0; ii<bs; ii++,value+=stepval) {
1459: for (jj=0; jj<bs; jj++) {
1460: *bap++ = *value++;
1461: }
1462: }
1463: }
1464: }
1465: goto noinsert2;
1466: }
1467: }
1468: if (nonew == 1) goto noinsert2;
1469: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1470: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1471: N = nrow++ - 1; high++;
1472: /* shift up all the later entries in this row */
1473: for (ii=N; ii>=i; ii--) {
1474: rp[ii+1] = rp[ii];
1475: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1476: }
1477: if (N >= i) {
1478: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1479: }
1480: rp[i] = col;
1481: bap = ap + bs2*i;
1482: if (roworiented) {
1483: for (ii=0; ii<bs; ii++,value+=stepval) {
1484: for (jj=ii; jj<bs2; jj+=bs) {
1485: bap[jj] = *value++;
1486: }
1487: }
1488: } else {
1489: for (ii=0; ii<bs; ii++,value+=stepval) {
1490: for (jj=0; jj<bs; jj++) {
1491: *bap++ = *value++;
1492: }
1493: }
1494: }
1495: noinsert2:;
1496: low = i;
1497: }
1498: ailen[row] = nrow;
1499: }
1500: return(0);
1501: }
1505: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1506: {
1507: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1508: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1509: PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen;
1511: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1512: MatScalar *aa = a->a,*ap;
1513: PetscReal ratio=0.6;
1516: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1518: if (m) rmax = ailen[0];
1519: for (i=1; i<mbs; i++) {
1520: /* move each row back by the amount of empty slots (fshift) before it*/
1521: fshift += imax[i-1] - ailen[i-1];
1522: rmax = PetscMax(rmax,ailen[i]);
1523: if (fshift) {
1524: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1525: N = ailen[i];
1526: for (j=0; j<N; j++) {
1527: ip[j-fshift] = ip[j];
1528: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1529: }
1530: }
1531: ai[i] = ai[i-1] + ailen[i-1];
1532: }
1533: if (mbs) {
1534: fshift += imax[mbs-1] - ailen[mbs-1];
1535: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1536: }
1537: /* reset ilen and imax for each row */
1538: for (i=0; i<mbs; i++) {
1539: ailen[i] = imax[i] = ai[i+1] - ai[i];
1540: }
1541: a->nz = ai[mbs];
1543: /* diagonals may have moved, so kill the diagonal pointers */
1544: a->idiagvalid = PETSC_FALSE;
1545: if (fshift && a->diag) {
1546: PetscFree(a->diag);
1547: PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1548: a->diag = 0;
1549: }
1550: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);
1551: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1552: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1553: a->reallocs = 0;
1554: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1556: /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1557: if (a->compressedrow.use){
1558: Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1559: }
1561: A->same_nonzero = PETSC_TRUE;
1562: return(0);
1563: }
1565: /*
1566: This function returns an array of flags which indicate the locations of contiguous
1567: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1568: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1569: Assume: sizes should be long enough to hold all the values.
1570: */
1573: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1574: {
1575: PetscInt i,j,k,row;
1576: PetscTruth flg;
1579: for (i=0,j=0; i<n; j++) {
1580: row = idx[i];
1581: if (row%bs!=0) { /* Not the begining of a block */
1582: sizes[j] = 1;
1583: i++;
1584: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1585: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1586: i++;
1587: } else { /* Begining of the block, so check if the complete block exists */
1588: flg = PETSC_TRUE;
1589: for (k=1; k<bs; k++) {
1590: if (row+k != idx[i+k]) { /* break in the block */
1591: flg = PETSC_FALSE;
1592: break;
1593: }
1594: }
1595: if (flg) { /* No break in the bs */
1596: sizes[j] = bs;
1597: i+= bs;
1598: } else {
1599: sizes[j] = 1;
1600: i++;
1601: }
1602: }
1603: }
1604: *bs_max = j;
1605: return(0);
1606: }
1607:
1610: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1611: {
1612: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1614: PetscInt i,j,k,count,*rows;
1615: PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1616: PetscScalar zero = 0.0;
1617: MatScalar *aa;
1620: /* Make a copy of the IS and sort it */
1621: /* allocate memory for rows,sizes */
1622: PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
1623: sizes = rows + is_n;
1625: /* copy IS values to rows, and sort them */
1626: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1627: PetscSortInt(is_n,rows);
1628: if (baij->keepzeroedrows) {
1629: for (i=0; i<is_n; i++) { sizes[i] = 1; }
1630: bs_max = is_n;
1631: A->same_nonzero = PETSC_TRUE;
1632: } else {
1633: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1634: A->same_nonzero = PETSC_FALSE;
1635: }
1637: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1638: row = rows[j];
1639: if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1640: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1641: aa = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1642: if (sizes[i] == bs && !baij->keepzeroedrows) {
1643: if (diag != 0.0) {
1644: if (baij->ilen[row/bs] > 0) {
1645: baij->ilen[row/bs] = 1;
1646: baij->j[baij->i[row/bs]] = row/bs;
1647: PetscMemzero(aa,count*bs*sizeof(MatScalar));
1648: }
1649: /* Now insert all the diagonal values for this bs */
1650: for (k=0; k<bs; k++) {
1651: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
1652: }
1653: } else { /* (diag == 0.0) */
1654: baij->ilen[row/bs] = 0;
1655: } /* end (diag == 0.0) */
1656: } else { /* (sizes[i] != bs) */
1657: #if defined (PETSC_USE_DEBUG)
1658: if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1659: #endif
1660: for (k=0; k<count; k++) {
1661: aa[0] = zero;
1662: aa += bs;
1663: }
1664: if (diag != 0.0) {
1665: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
1666: }
1667: }
1668: }
1670: PetscFree(rows);
1671: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1672: return(0);
1673: }
1677: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1678: {
1679: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1680: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1681: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1682: PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1684: PetscInt ridx,cidx,bs2=a->bs2;
1685: PetscTruth roworiented=a->roworiented;
1686: MatScalar *ap,value,*aa=a->a,*bap;
1689: for (k=0; k<m; k++) { /* loop over added rows */
1690: row = im[k];
1691: brow = row/bs;
1692: if (row < 0) continue;
1693: #if defined(PETSC_USE_DEBUG)
1694: if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1695: #endif
1696: rp = aj + ai[brow];
1697: ap = aa + bs2*ai[brow];
1698: rmax = imax[brow];
1699: nrow = ailen[brow];
1700: low = 0;
1701: high = nrow;
1702: for (l=0; l<n; l++) { /* loop over added columns */
1703: if (in[l] < 0) continue;
1704: #if defined(PETSC_USE_DEBUG)
1705: if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1706: #endif
1707: col = in[l]; bcol = col/bs;
1708: ridx = row % bs; cidx = col % bs;
1709: if (roworiented) {
1710: value = v[l + k*n];
1711: } else {
1712: value = v[k + l*m];
1713: }
1714: if (col <= lastcol) low = 0; else high = nrow;
1715: lastcol = col;
1716: while (high-low > 7) {
1717: t = (low+high)/2;
1718: if (rp[t] > bcol) high = t;
1719: else low = t;
1720: }
1721: for (i=low; i<high; i++) {
1722: if (rp[i] > bcol) break;
1723: if (rp[i] == bcol) {
1724: bap = ap + bs2*i + bs*cidx + ridx;
1725: if (is == ADD_VALUES) *bap += value;
1726: else *bap = value;
1727: goto noinsert1;
1728: }
1729: }
1730: if (nonew == 1) goto noinsert1;
1731: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1732: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1733: N = nrow++ - 1; high++;
1734: /* shift up all the later entries in this row */
1735: for (ii=N; ii>=i; ii--) {
1736: rp[ii+1] = rp[ii];
1737: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1738: }
1739: if (N>=i) {
1740: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1741: }
1742: rp[i] = bcol;
1743: ap[bs2*i + bs*cidx + ridx] = value;
1744: a->nz++;
1745: noinsert1:;
1746: low = i;
1747: }
1748: ailen[brow] = nrow;
1749: }
1750: A->same_nonzero = PETSC_FALSE;
1751: return(0);
1752: }
1757: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1758: {
1759: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1760: Mat outA;
1762: PetscTruth row_identity,col_identity;
1765: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1766: ISIdentity(row,&row_identity);
1767: ISIdentity(col,&col_identity);
1768: if (!row_identity || !col_identity) {
1769: SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1770: }
1772: outA = inA;
1773: inA->factor = FACTOR_LU;
1775: MatMarkDiagonal_SeqBAIJ(inA);
1777: a->row = row;
1778: a->col = col;
1779: PetscObjectReference((PetscObject)row);
1780: PetscObjectReference((PetscObject)col);
1781:
1782: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1783: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1784: PetscLogObjectParent(inA,a->icol);
1785:
1786: /*
1787: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1788: for ILU(0) factorization with natural ordering
1789: */
1790: if (inA->rmap.bs < 8) {
1791: MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1792: } else {
1793: if (!a->solve_work) {
1794: PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);
1795: PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));
1796: }
1797: }
1799: MatLUFactorNumeric(inA,info,&outA);
1801: return(0);
1802: }
1807: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1808: {
1809: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1810: PetscInt i,nz,nbs;
1813: nz = baij->maxnz/baij->bs2;
1814: nbs = baij->nbs;
1815: for (i=0; i<nz; i++) {
1816: baij->j[i] = indices[i];
1817: }
1818: baij->nz = nz;
1819: for (i=0; i<nbs; i++) {
1820: baij->ilen[i] = baij->imax[i];
1821: }
1823: return(0);
1824: }
1829: /*@
1830: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1831: in the matrix.
1833: Input Parameters:
1834: + mat - the SeqBAIJ matrix
1835: - indices - the column indices
1837: Level: advanced
1839: Notes:
1840: This can be called if you have precomputed the nonzero structure of the
1841: matrix and want to provide it to the matrix object to improve the performance
1842: of the MatSetValues() operation.
1844: You MUST have set the correct numbers of nonzeros per row in the call to
1845: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1847: MUST be called before any calls to MatSetValues();
1849: @*/
1850: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1851: {
1852: PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1857: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1858: if (f) {
1859: (*f)(mat,indices);
1860: } else {
1861: SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1862: }
1863: return(0);
1864: }
1868: PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1869: {
1870: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1872: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
1873: PetscReal atmp;
1874: PetscScalar *x,zero = 0.0;
1875: MatScalar *aa;
1876: PetscInt ncols,brow,krow,kcol;
1879: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1880: bs = A->rmap.bs;
1881: aa = a->a;
1882: ai = a->i;
1883: aj = a->j;
1884: mbs = a->mbs;
1886: VecSet(v,zero);
1887: VecGetArray(v,&x);
1888: VecGetLocalSize(v,&n);
1889: if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1890: for (i=0; i<mbs; i++) {
1891: ncols = ai[1] - ai[0]; ai++;
1892: brow = bs*i;
1893: for (j=0; j<ncols; j++){
1894: /* bcol = bs*(*aj); */
1895: for (kcol=0; kcol<bs; kcol++){
1896: for (krow=0; krow<bs; krow++){
1897: atmp = PetscAbsScalar(*aa); aa++;
1898: row = brow + krow; /* row index */
1899: /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1900: if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1901: }
1902: }
1903: aj++;
1904: }
1905: }
1906: VecRestoreArray(v,&x);
1907: return(0);
1908: }
1912: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1913: {
1917: /* If the two matrices have the same copy implementation, use fast copy. */
1918: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1919: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1920: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1922: if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1923: SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1924: }
1925: PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));
1926: } else {
1927: MatCopy_Basic(A,B,str);
1928: }
1929: return(0);
1930: }
1934: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1935: {
1939: MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);
1940: return(0);
1941: }
1945: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1946: {
1947: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1949: *array = a->a;
1950: return(0);
1951: }
1955: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1956: {
1958: return(0);
1959: }
1961: #include petscblaslapack.h
1964: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1965: {
1966: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1968: PetscInt i,bs=Y->rmap.bs,j,bs2;
1969: PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz;
1972: if (str == SAME_NONZERO_PATTERN) {
1973: PetscScalar alpha = a;
1974: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1975: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1976: if (y->xtoy && y->XtoY != X) {
1977: PetscFree(y->xtoy);
1978: MatDestroy(y->XtoY);
1979: }
1980: if (!y->xtoy) { /* get xtoy */
1981: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1982: y->XtoY = X;
1983: }
1984: bs2 = bs*bs;
1985: for (i=0; i<x->nz; i++) {
1986: j = 0;
1987: while (j < bs2){
1988: y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1989: j++;
1990: }
1991: }
1992: PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1993: } else {
1994: MatAXPY_Basic(Y,a,X,str);
1995: }
1996: return(0);
1997: }
2001: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2002: {
2003: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2004: PetscInt i,nz = a->bs2*a->i[a->mbs];
2005: PetscScalar *aa = a->a;
2008: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2009: return(0);
2010: }
2014: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2015: {
2016: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2017: PetscInt i,nz = a->bs2*a->i[a->mbs];
2018: PetscScalar *aa = a->a;
2021: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2022: return(0);
2023: }
2026: /* -------------------------------------------------------------------*/
2027: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2028: MatGetRow_SeqBAIJ,
2029: MatRestoreRow_SeqBAIJ,
2030: MatMult_SeqBAIJ_N,
2031: /* 4*/ MatMultAdd_SeqBAIJ_N,
2032: MatMultTranspose_SeqBAIJ,
2033: MatMultTransposeAdd_SeqBAIJ,
2034: MatSolve_SeqBAIJ_N,
2035: 0,
2036: 0,
2037: /*10*/ 0,
2038: MatLUFactor_SeqBAIJ,
2039: 0,
2040: 0,
2041: MatTranspose_SeqBAIJ,
2042: /*15*/ MatGetInfo_SeqBAIJ,
2043: MatEqual_SeqBAIJ,
2044: MatGetDiagonal_SeqBAIJ,
2045: MatDiagonalScale_SeqBAIJ,
2046: MatNorm_SeqBAIJ,
2047: /*20*/ 0,
2048: MatAssemblyEnd_SeqBAIJ,
2049: 0,
2050: MatSetOption_SeqBAIJ,
2051: MatZeroEntries_SeqBAIJ,
2052: /*25*/ MatZeroRows_SeqBAIJ,
2053: MatLUFactorSymbolic_SeqBAIJ,
2054: MatLUFactorNumeric_SeqBAIJ_N,
2055: MatCholeskyFactorSymbolic_SeqBAIJ,
2056: MatCholeskyFactorNumeric_SeqBAIJ_N,
2057: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2058: MatILUFactorSymbolic_SeqBAIJ,
2059: MatICCFactorSymbolic_SeqBAIJ,
2060: MatGetArray_SeqBAIJ,
2061: MatRestoreArray_SeqBAIJ,
2062: /*35*/ MatDuplicate_SeqBAIJ,
2063: 0,
2064: 0,
2065: MatILUFactor_SeqBAIJ,
2066: 0,
2067: /*40*/ MatAXPY_SeqBAIJ,
2068: MatGetSubMatrices_SeqBAIJ,
2069: MatIncreaseOverlap_SeqBAIJ,
2070: MatGetValues_SeqBAIJ,
2071: MatCopy_SeqBAIJ,
2072: /*45*/ 0,
2073: MatScale_SeqBAIJ,
2074: 0,
2075: 0,
2076: 0,
2077: /*50*/ 0,
2078: MatGetRowIJ_SeqBAIJ,
2079: MatRestoreRowIJ_SeqBAIJ,
2080: 0,
2081: 0,
2082: /*55*/ 0,
2083: 0,
2084: 0,
2085: 0,
2086: MatSetValuesBlocked_SeqBAIJ,
2087: /*60*/ MatGetSubMatrix_SeqBAIJ,
2088: MatDestroy_SeqBAIJ,
2089: MatView_SeqBAIJ,
2090: 0,
2091: 0,
2092: /*65*/ 0,
2093: 0,
2094: 0,
2095: 0,
2096: 0,
2097: /*70*/ MatGetRowMax_SeqBAIJ,
2098: MatConvert_Basic,
2099: 0,
2100: 0,
2101: 0,
2102: /*75*/ 0,
2103: 0,
2104: 0,
2105: 0,
2106: 0,
2107: /*80*/ 0,
2108: 0,
2109: 0,
2110: 0,
2111: MatLoad_SeqBAIJ,
2112: /*85*/ 0,
2113: 0,
2114: 0,
2115: 0,
2116: 0,
2117: /*90*/ 0,
2118: 0,
2119: 0,
2120: 0,
2121: 0,
2122: /*95*/ 0,
2123: 0,
2124: 0,
2125: 0,
2126: 0,
2127: /*100*/0,
2128: 0,
2129: 0,
2130: 0,
2131: 0,
2132: /*105*/0,
2133: MatRealPart_SeqBAIJ,
2134: MatImaginaryPart_SeqBAIJ
2135: };
2140: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2141: {
2142: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2143: PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2147: if (aij->nonew != 1) {
2148: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2149: }
2151: /* allocate space for values if not already there */
2152: if (!aij->saved_values) {
2153: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2154: }
2156: /* copy values over */
2157: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2158: return(0);
2159: }
2165: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2166: {
2167: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2169: PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2172: if (aij->nonew != 1) {
2173: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2174: }
2175: if (!aij->saved_values) {
2176: SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2177: }
2179: /* copy values over */
2180: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2181: return(0);
2182: }
2193: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2194: {
2195: Mat_SeqBAIJ *b;
2197: PetscInt i,mbs,nbs,bs2,newbs = bs;
2198: PetscTruth flg,skipallocation = PETSC_FALSE;
2202: if (nz == MAT_SKIP_ALLOCATION) {
2203: skipallocation = PETSC_TRUE;
2204: nz = 0;
2205: }
2207: PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");
2208: PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);
2209: PetscOptionsEnd();
2211: if (nnz && newbs != bs) {
2212: SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2213: }
2214: bs = newbs;
2216: B->rmap.bs = B->cmap.bs = bs;
2217: PetscMapInitialize(B->comm,&B->rmap);
2218: PetscMapInitialize(B->comm,&B->cmap);
2220: B->preallocated = PETSC_TRUE;
2222: mbs = B->rmap.n/bs;
2223: nbs = B->cmap.n/bs;
2224: bs2 = bs*bs;
2226: if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2227: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2228: }
2230: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2231: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2232: if (nnz) {
2233: for (i=0; i<mbs; i++) {
2234: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2235: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2236: }
2237: }
2239: b = (Mat_SeqBAIJ*)B->data;
2240: PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2241: PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2242: PetscOptionsEnd();
2244: B->ops->solve = MatSolve_SeqBAIJ_Update;
2245: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
2246: if (!flg) {
2247: switch (bs) {
2248: case 1:
2249: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2250: B->ops->mult = MatMult_SeqBAIJ_1;
2251: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2252: break;
2253: case 2:
2254: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2255: B->ops->mult = MatMult_SeqBAIJ_2;
2256: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2257: B->ops->pbrelax = MatPBRelax_SeqBAIJ_2;
2258: break;
2259: case 3:
2260: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2261: B->ops->mult = MatMult_SeqBAIJ_3;
2262: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2263: B->ops->pbrelax = MatPBRelax_SeqBAIJ_3;
2264: break;
2265: case 4:
2266: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2267: B->ops->mult = MatMult_SeqBAIJ_4;
2268: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2269: B->ops->pbrelax = MatPBRelax_SeqBAIJ_4;
2270: break;
2271: case 5:
2272: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2273: B->ops->mult = MatMult_SeqBAIJ_5;
2274: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2275: B->ops->pbrelax = MatPBRelax_SeqBAIJ_5;
2276: break;
2277: case 6:
2278: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2279: B->ops->mult = MatMult_SeqBAIJ_6;
2280: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2281: break;
2282: case 7:
2283: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2284: B->ops->mult = MatMult_SeqBAIJ_7;
2285: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2286: break;
2287: default:
2288: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2289: B->ops->mult = MatMult_SeqBAIJ_N;
2290: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2291: break;
2292: }
2293: }
2294: B->rmap.bs = bs;
2295: b->mbs = mbs;
2296: b->nbs = nbs;
2297: if (!skipallocation) {
2298: PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2299: /* b->ilen will count nonzeros in each block row so far. */
2300: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2301: if (!nnz) {
2302: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2303: else if (nz <= 0) nz = 1;
2304: for (i=0; i<mbs; i++) b->imax[i] = nz;
2305: nz = nz*mbs;
2306: } else {
2307: nz = 0;
2308: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2309: }
2311: /* allocate the matrix space */
2312: PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);
2313: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2314: PetscMemzero(b->j,nz*sizeof(PetscInt));
2315: b->singlemalloc = PETSC_TRUE;
2317: b->i[0] = 0;
2318: for (i=1; i<mbs+1; i++) {
2319: b->i[i] = b->i[i-1] + b->imax[i-1];
2320: }
2321: b->free_a = PETSC_TRUE;
2322: b->free_ij = PETSC_TRUE;
2323: } else {
2324: b->free_a = PETSC_FALSE;
2325: b->free_ij = PETSC_FALSE;
2326: }
2328: B->rmap.bs = bs;
2329: b->bs2 = bs2;
2330: b->mbs = mbs;
2331: b->nz = 0;
2332: b->maxnz = nz*bs2;
2333: B->info.nz_unneeded = (PetscReal)b->maxnz;
2334: return(0);
2335: }
2338: /*MC
2339: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2340: block sparse compressed row format.
2342: Options Database Keys:
2343: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2345: Level: beginner
2347: .seealso: MatCreateSeqBAIJ()
2348: M*/
2353: PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2354: {
2356: PetscMPIInt size;
2357: Mat_SeqBAIJ *b;
2360: MPI_Comm_size(B->comm,&size);
2361: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2363: PetscNew(Mat_SeqBAIJ,&b);
2364: B->data = (void*)b;
2365: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2366: B->factor = 0;
2367: B->mapping = 0;
2368: b->row = 0;
2369: b->col = 0;
2370: b->icol = 0;
2371: b->reallocs = 0;
2372: b->saved_values = 0;
2373: #if defined(PETSC_USE_MAT_SINGLE)
2374: b->setvalueslen = 0;
2375: b->setvaluescopy = PETSC_NULL;
2376: #endif
2378: b->sorted = PETSC_FALSE;
2379: b->roworiented = PETSC_TRUE;
2380: b->nonew = 0;
2381: b->diag = 0;
2382: b->solve_work = 0;
2383: b->mult_work = 0;
2384: B->spptr = 0;
2385: B->info.nz_unneeded = (PetscReal)b->maxnz;
2386: b->keepzeroedrows = PETSC_FALSE;
2387: b->xtoy = 0;
2388: b->XtoY = 0;
2389: b->compressedrow.use = PETSC_FALSE;
2390: b->compressedrow.nrows = 0;
2391: b->compressedrow.i = PETSC_NULL;
2392: b->compressedrow.rindex = PETSC_NULL;
2393: b->compressedrow.checked = PETSC_FALSE;
2394: B->same_nonzero = PETSC_FALSE;
2396: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2397: "MatInvertBlockDiagonal_SeqBAIJ",
2398: MatInvertBlockDiagonal_SeqBAIJ);
2399: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2400: "MatStoreValues_SeqBAIJ",
2401: MatStoreValues_SeqBAIJ);
2402: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2403: "MatRetrieveValues_SeqBAIJ",
2404: MatRetrieveValues_SeqBAIJ);
2405: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2406: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2407: MatSeqBAIJSetColumnIndices_SeqBAIJ);
2408: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2409: "MatConvert_SeqBAIJ_SeqAIJ",
2410: MatConvert_SeqBAIJ_SeqAIJ);
2411: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2412: "MatConvert_SeqBAIJ_SeqSBAIJ",
2413: MatConvert_SeqBAIJ_SeqSBAIJ);
2414: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2415: "MatSeqBAIJSetPreallocation_SeqBAIJ",
2416: MatSeqBAIJSetPreallocation_SeqBAIJ);
2417: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
2418: return(0);
2419: }
2424: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2425: {
2426: Mat C;
2427: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2429: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2432: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2434: *B = 0;
2435: MatCreate(A->comm,&C);
2436: MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);
2437: MatSetType(C,A->type_name);
2438: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
2439: c = (Mat_SeqBAIJ*)C->data;
2441: C->rmap.N = A->rmap.N;
2442: C->cmap.N = A->cmap.N;
2443: C->rmap.bs = A->rmap.bs;
2444: c->bs2 = a->bs2;
2445: c->mbs = a->mbs;
2446: c->nbs = a->nbs;
2448: PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
2449: for (i=0; i<mbs; i++) {
2450: c->imax[i] = a->imax[i];
2451: c->ilen[i] = a->ilen[i];
2452: }
2454: /* allocate the matrix space */
2455: PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
2456: c->singlemalloc = PETSC_TRUE;
2457: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2458: if (mbs > 0) {
2459: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2460: if (cpvalues == MAT_COPY_VALUES) {
2461: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2462: } else {
2463: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2464: }
2465: }
2466: c->sorted = a->sorted;
2467: c->roworiented = a->roworiented;
2468: c->nonew = a->nonew;
2470: if (a->diag) {
2471: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2472: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2473: for (i=0; i<mbs; i++) {
2474: c->diag[i] = a->diag[i];
2475: }
2476: } else c->diag = 0;
2477: c->nz = a->nz;
2478: c->maxnz = a->maxnz;
2479: c->solve_work = 0;
2480: c->mult_work = 0;
2481: c->free_a = PETSC_TRUE;
2482: c->free_ij = PETSC_TRUE;
2483: C->preallocated = PETSC_TRUE;
2484: C->assembled = PETSC_TRUE;
2486: c->compressedrow.use = a->compressedrow.use;
2487: c->compressedrow.nrows = a->compressedrow.nrows;
2488: c->compressedrow.checked = a->compressedrow.checked;
2489: if ( a->compressedrow.checked && a->compressedrow.use){
2490: i = a->compressedrow.nrows;
2491: PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);
2492: c->compressedrow.rindex = c->compressedrow.i + i + 1;
2493: PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
2494: PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
2495: } else {
2496: c->compressedrow.use = PETSC_FALSE;
2497: c->compressedrow.i = PETSC_NULL;
2498: c->compressedrow.rindex = PETSC_NULL;
2499: }
2500: C->same_nonzero = A->same_nonzero;
2501: *B = C;
2502: PetscFListDuplicate(A->qlist,&C->qlist);
2503: return(0);
2504: }
2508: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2509: {
2510: Mat_SeqBAIJ *a;
2511: Mat B;
2513: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1;
2514: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2515: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows;
2516: PetscInt *masked,nmask,tmp,bs2,ishift;
2517: PetscMPIInt size;
2518: int fd;
2519: PetscScalar *aa;
2520: MPI_Comm comm = ((PetscObject)viewer)->comm;
2523: PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
2524: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2525: PetscOptionsEnd();
2526: bs2 = bs*bs;
2528: MPI_Comm_size(comm,&size);
2529: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2530: PetscViewerBinaryGetDescriptor(viewer,&fd);
2531: PetscBinaryRead(fd,header,4,PETSC_INT);
2532: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2533: M = header[1]; N = header[2]; nz = header[3];
2535: if (header[3] < 0) {
2536: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2537: }
2539: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2541: /*
2542: This code adds extra rows to make sure the number of rows is
2543: divisible by the blocksize
2544: */
2545: mbs = M/bs;
2546: extra_rows = bs - M + bs*(mbs);
2547: if (extra_rows == bs) extra_rows = 0;
2548: else mbs++;
2549: if (extra_rows) {
2550: PetscInfo(0,"Padding loaded matrix to match blocksize\n");
2551: }
2553: /* read in row lengths */
2554: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2555: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2556: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2558: /* read in column indices */
2559: PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2560: PetscBinaryRead(fd,jj,nz,PETSC_INT);
2561: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2563: /* loop over row lengths determining block row lengths */
2564: PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2565: PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2566: PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
2567: PetscMemzero(mask,mbs*sizeof(PetscInt));
2568: masked = mask + mbs;
2569: rowcount = 0; nzcount = 0;
2570: for (i=0; i<mbs; i++) {
2571: nmask = 0;
2572: for (j=0; j<bs; j++) {
2573: kmax = rowlengths[rowcount];
2574: for (k=0; k<kmax; k++) {
2575: tmp = jj[nzcount++]/bs;
2576: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2577: }
2578: rowcount++;
2579: }
2580: browlengths[i] += nmask;
2581: /* zero out the mask elements we set */
2582: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2583: }
2585: /* create our matrix */
2586: MatCreate(comm,&B);
2587: MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2588: MatSetType(B,type);
2589: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
2590: a = (Mat_SeqBAIJ*)B->data;
2592: /* set matrix "i" values */
2593: a->i[0] = 0;
2594: for (i=1; i<= mbs; i++) {
2595: a->i[i] = a->i[i-1] + browlengths[i-1];
2596: a->ilen[i-1] = browlengths[i-1];
2597: }
2598: a->nz = 0;
2599: for (i=0; i<mbs; i++) a->nz += browlengths[i];
2601: /* read in nonzero values */
2602: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
2603: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
2604: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2606: /* set "a" and "j" values into matrix */
2607: nzcount = 0; jcount = 0;
2608: for (i=0; i<mbs; i++) {
2609: nzcountb = nzcount;
2610: nmask = 0;
2611: for (j=0; j<bs; j++) {
2612: kmax = rowlengths[i*bs+j];
2613: for (k=0; k<kmax; k++) {
2614: tmp = jj[nzcount++]/bs;
2615: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2616: }
2617: }
2618: /* sort the masked values */
2619: PetscSortInt(nmask,masked);
2621: /* set "j" values into matrix */
2622: maskcount = 1;
2623: for (j=0; j<nmask; j++) {
2624: a->j[jcount++] = masked[j];
2625: mask[masked[j]] = maskcount++;
2626: }
2627: /* set "a" values into matrix */
2628: ishift = bs2*a->i[i];
2629: for (j=0; j<bs; j++) {
2630: kmax = rowlengths[i*bs+j];
2631: for (k=0; k<kmax; k++) {
2632: tmp = jj[nzcountb]/bs ;
2633: block = mask[tmp] - 1;
2634: point = jj[nzcountb] - bs*tmp;
2635: idx = ishift + bs2*block + j + bs*point;
2636: a->a[idx] = (MatScalar)aa[nzcountb++];
2637: }
2638: }
2639: /* zero out the mask elements we set */
2640: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2641: }
2642: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2644: PetscFree(rowlengths);
2645: PetscFree(browlengths);
2646: PetscFree(aa);
2647: PetscFree(jj);
2648: PetscFree(mask);
2650: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2651: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2652: MatView_Private(B);
2654: *A = B;
2655: return(0);
2656: }
2660: /*@C
2661: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2662: compressed row) format. For good matrix assembly performance the
2663: user should preallocate the matrix storage by setting the parameter nz
2664: (or the array nnz). By setting these parameters accurately, performance
2665: during matrix assembly can be increased by more than a factor of 50.
2667: Collective on MPI_Comm
2669: Input Parameters:
2670: + comm - MPI communicator, set to PETSC_COMM_SELF
2671: . bs - size of block
2672: . m - number of rows
2673: . n - number of columns
2674: . nz - number of nonzero blocks per block row (same for all rows)
2675: - nnz - array containing the number of nonzero blocks in the various block rows
2676: (possibly different for each block row) or PETSC_NULL
2678: Output Parameter:
2679: . A - the matrix
2681: Options Database Keys:
2682: . -mat_no_unroll - uses code that does not unroll the loops in the
2683: block calculations (much slower)
2684: . -mat_block_size - size of the blocks to use
2686: Level: intermediate
2688: Notes:
2689: The number of rows and columns must be divisible by blocksize.
2691: If the nnz parameter is given then the nz parameter is ignored
2693: A nonzero block is any block that as 1 or more nonzeros in it
2695: The block AIJ format is fully compatible with standard Fortran 77
2696: storage. That is, the stored row and column indices can begin at
2697: either one (as in Fortran) or zero. See the users' manual for details.
2699: Specify the preallocated storage with either nz or nnz (not both).
2700: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2701: allocation. For additional details, see the users manual chapter on
2702: matrices.
2704: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2705: @*/
2706: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2707: {
2709:
2711: MatCreate(comm,A);
2712: MatSetSizes(*A,m,n,m,n);
2713: MatSetType(*A,MATSEQBAIJ);
2714: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
2715: return(0);
2716: }
2720: /*@C
2721: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2722: per row in the matrix. For good matrix assembly performance the
2723: user should preallocate the matrix storage by setting the parameter nz
2724: (or the array nnz). By setting these parameters accurately, performance
2725: during matrix assembly can be increased by more than a factor of 50.
2727: Collective on MPI_Comm
2729: Input Parameters:
2730: + A - the matrix
2731: . bs - size of block
2732: . nz - number of block nonzeros per block row (same for all rows)
2733: - nnz - array containing the number of block nonzeros in the various block rows
2734: (possibly different for each block row) or PETSC_NULL
2736: Options Database Keys:
2737: . -mat_no_unroll - uses code that does not unroll the loops in the
2738: block calculations (much slower)
2739: . -mat_block_size - size of the blocks to use
2741: Level: intermediate
2743: Notes:
2744: If the nnz parameter is given then the nz parameter is ignored
2746: The block AIJ format is fully compatible with standard Fortran 77
2747: storage. That is, the stored row and column indices can begin at
2748: either one (as in Fortran) or zero. See the users' manual for details.
2750: Specify the preallocated storage with either nz or nnz (not both).
2751: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2752: allocation. For additional details, see the users manual chapter on
2753: matrices.
2755: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2756: @*/
2757: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2758: {
2759: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2762: PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2763: if (f) {
2764: (*f)(B,bs,nz,nnz);
2765: }
2766: return(0);
2767: }
2771: /*@
2772: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2773: (upper triangular entries in CSR format) provided by the user.
2775: Collective on MPI_Comm
2777: Input Parameters:
2778: + comm - must be an MPI communicator of size 1
2779: . bs - size of block
2780: . m - number of rows
2781: . n - number of columns
2782: . i - row indices
2783: . j - column indices
2784: - a - matrix values
2786: Output Parameter:
2787: . mat - the matrix
2789: Level: intermediate
2791: Notes:
2792: The i, j, and a arrays are not copied by this routine, the user must free these arrays
2793: once the matrix is destroyed
2795: You cannot set new nonzero locations into this matrix, that will generate an error.
2797: The i and j indices are 0 based
2799: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2801: @*/
2802: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2803: {
2805: PetscInt ii;
2806: Mat_SeqBAIJ *baij;
2809: if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2810: if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2811:
2812: MatCreate(comm,mat);
2813: MatSetSizes(*mat,m,n,m,n);
2814: MatSetType(*mat,MATSEQBAIJ);
2815: MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
2816: baij = (Mat_SeqBAIJ*)(*mat)->data;
2817: PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
2819: baij->i = i;
2820: baij->j = j;
2821: baij->a = a;
2822: baij->singlemalloc = PETSC_FALSE;
2823: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2824: baij->free_a = PETSC_FALSE;
2825: baij->free_ij = PETSC_FALSE;
2827: for (ii=0; ii<m; ii++) {
2828: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2829: #if defined(PETSC_USE_DEBUG)
2830: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2831: #endif
2832: }
2833: #if defined(PETSC_USE_DEBUG)
2834: for (ii=0; ii<baij->i[m]; ii++) {
2835: if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2836: if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2837: }
2838: #endif
2840: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
2841: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
2842: return(0);
2843: }