Actual source code: mpibaij.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/baij/mpi/mpibaij.h
5: EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6: EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7: EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8: EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9: EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10: EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12: EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13: EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14: EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16: /* UGLY, ugly, ugly
17: When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18: not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19: inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20: converts the entries into single precision and then calls ..._MatScalar() to put them
21: into the single precision data structures.
22: */
23: #if defined(PETSC_USE_MAT_SINGLE)
24: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25: EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26: EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27: EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28: EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29: #else
30: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
31: #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ
32: #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ
33: #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT
34: #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT
35: #endif
39: PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40: {
41: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
43: PetscInt i;
44: PetscScalar *va,*vb;
45: Vec vtmp;
48:
49: MatGetRowMax(a->A,v);
50: VecGetArray(v,&va);
52: VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);
53: MatGetRowMax(a->B,vtmp);
54: VecGetArray(vtmp,&vb);
56: for (i=0; i<A->rmap.n; i++){
57: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58: }
60: VecRestoreArray(v,&va);
61: VecRestoreArray(vtmp,&vb);
62: VecDestroy(vtmp);
63:
64: return(0);
65: }
70: PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
71: {
72: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
76: MatStoreValues(aij->A);
77: MatStoreValues(aij->B);
78: return(0);
79: }
85: PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
86: {
87: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
91: MatRetrieveValues(aij->A);
92: MatRetrieveValues(aij->B);
93: return(0);
94: }
97: /*
98: Local utility routine that creates a mapping from the global column
99: number to the local number in the off-diagonal part of the local
100: storage of the matrix. This is done in a non scable way since the
101: length of colmap equals the global matrix length.
102: */
105: PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106: {
107: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
108: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110: PetscInt nbs = B->nbs,i,bs=mat->rmap.bs;
113: #if defined (PETSC_USE_CTABLE)
114: PetscTableCreate(baij->nbs,&baij->colmap);
115: for (i=0; i<nbs; i++){
116: PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);
117: }
118: #else
119: PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);
120: PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));
121: PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));
122: for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123: #endif
124: return(0);
125: }
127: #define CHUNKSIZE 10
129: #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130: { \
131: \
132: brow = row/bs; \
133: rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134: rmax = aimax[brow]; nrow = ailen[brow]; \
135: bcol = col/bs; \
136: ridx = row % bs; cidx = col % bs; \
137: low = 0; high = nrow; \
138: while (high-low > 3) { \
139: t = (low+high)/2; \
140: if (rp[t] > bcol) high = t; \
141: else low = t; \
142: } \
143: for (_i=low; _i<high; _i++) { \
144: if (rp[_i] > bcol) break; \
145: if (rp[_i] == bcol) { \
146: bap = ap + bs2*_i + bs*cidx + ridx; \
147: if (addv == ADD_VALUES) *bap += value; \
148: else *bap = value; \
149: goto a_noinsert; \
150: } \
151: } \
152: if (a->nonew == 1) goto a_noinsert; \
153: if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
154: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
155: N = nrow++ - 1; \
156: /* shift up all the later entries in this row */ \
157: for (ii=N; ii>=_i; ii--) { \
158: rp[ii+1] = rp[ii]; \
159: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
160: } \
161: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); } \
162: rp[_i] = bcol; \
163: ap[bs2*_i + bs*cidx + ridx] = value; \
164: a_noinsert:; \
165: ailen[brow] = nrow; \
166: }
168: #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
169: { \
170: brow = row/bs; \
171: rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
172: rmax = bimax[brow]; nrow = bilen[brow]; \
173: bcol = col/bs; \
174: ridx = row % bs; cidx = col % bs; \
175: low = 0; high = nrow; \
176: while (high-low > 3) { \
177: t = (low+high)/2; \
178: if (rp[t] > bcol) high = t; \
179: else low = t; \
180: } \
181: for (_i=low; _i<high; _i++) { \
182: if (rp[_i] > bcol) break; \
183: if (rp[_i] == bcol) { \
184: bap = ap + bs2*_i + bs*cidx + ridx; \
185: if (addv == ADD_VALUES) *bap += value; \
186: else *bap = value; \
187: goto b_noinsert; \
188: } \
189: } \
190: if (b->nonew == 1) goto b_noinsert; \
191: if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
192: MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
193: CHKMEMQ;\
194: N = nrow++ - 1; \
195: /* shift up all the later entries in this row */ \
196: for (ii=N; ii>=_i; ii--) { \
197: rp[ii+1] = rp[ii]; \
198: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
199: } \
200: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));} \
201: rp[_i] = bcol; \
202: ap[bs2*_i + bs*cidx + ridx] = value; \
203: b_noinsert:; \
204: bilen[brow] = nrow; \
205: }
207: #if defined(PETSC_USE_MAT_SINGLE)
210: PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
211: {
212: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
214: PetscInt i,N = m*n;
215: MatScalar *vsingle;
218: if (N > b->setvalueslen) {
219: PetscFree(b->setvaluescopy);
220: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
221: b->setvalueslen = N;
222: }
223: vsingle = b->setvaluescopy;
225: for (i=0; i<N; i++) {
226: vsingle[i] = v[i];
227: }
228: MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
229: return(0);
230: }
234: PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235: {
236: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
238: PetscInt i,N = m*n*b->bs2;
239: MatScalar *vsingle;
242: if (N > b->setvalueslen) {
243: PetscFree(b->setvaluescopy);
244: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
245: b->setvalueslen = N;
246: }
247: vsingle = b->setvaluescopy;
248: for (i=0; i<N; i++) {
249: vsingle[i] = v[i];
250: }
251: MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
252: return(0);
253: }
257: PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
258: {
259: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261: PetscInt i,N = m*n;
262: MatScalar *vsingle;
265: if (N > b->setvalueslen) {
266: PetscFree(b->setvaluescopy);
267: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
268: b->setvalueslen = N;
269: }
270: vsingle = b->setvaluescopy;
271: for (i=0; i<N; i++) {
272: vsingle[i] = v[i];
273: }
274: MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);
275: return(0);
276: }
280: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
281: {
282: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
284: PetscInt i,N = m*n*b->bs2;
285: MatScalar *vsingle;
288: if (N > b->setvalueslen) {
289: PetscFree(b->setvaluescopy);
290: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
291: b->setvalueslen = N;
292: }
293: vsingle = b->setvaluescopy;
294: for (i=0; i<N; i++) {
295: vsingle[i] = v[i];
296: }
297: MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);
298: return(0);
299: }
300: #endif
304: PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
305: {
306: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
307: MatScalar value;
308: PetscTruth roworiented = baij->roworiented;
310: PetscInt i,j,row,col;
311: PetscInt rstart_orig=mat->rmap.rstart;
312: PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
313: PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
315: /* Some Variables required in the macro */
316: Mat A = baij->A;
317: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
318: PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
319: MatScalar *aa=a->a;
321: Mat B = baij->B;
322: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
323: PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
324: MatScalar *ba=b->a;
326: PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
327: PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
328: MatScalar *ap,*bap;
331: for (i=0; i<m; i++) {
332: if (im[i] < 0) continue;
333: #if defined(PETSC_USE_DEBUG)
334: if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
335: #endif
336: if (im[i] >= rstart_orig && im[i] < rend_orig) {
337: row = im[i] - rstart_orig;
338: for (j=0; j<n; j++) {
339: if (in[j] >= cstart_orig && in[j] < cend_orig){
340: col = in[j] - cstart_orig;
341: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
342: MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
343: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
344: } else if (in[j] < 0) continue;
345: #if defined(PETSC_USE_DEBUG)
346: else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
347: #endif
348: else {
349: if (mat->was_assembled) {
350: if (!baij->colmap) {
351: CreateColmap_MPIBAIJ_Private(mat);
352: }
353: #if defined (PETSC_USE_CTABLE)
354: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
355: col = col - 1;
356: #else
357: col = baij->colmap[in[j]/bs] - 1;
358: #endif
359: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
360: DisAssemble_MPIBAIJ(mat);
361: col = in[j];
362: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
363: B = baij->B;
364: b = (Mat_SeqBAIJ*)(B)->data;
365: bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
366: ba=b->a;
367: } else col += in[j]%bs;
368: } else col = in[j];
369: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
370: MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
371: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
372: }
373: }
374: } else {
375: if (!baij->donotstash) {
376: if (roworiented) {
377: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
378: } else {
379: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
380: }
381: }
382: }
383: }
384: return(0);
385: }
389: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
390: {
391: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
392: const MatScalar *value;
393: MatScalar *barray=baij->barray;
394: PetscTruth roworiented = baij->roworiented;
395: PetscErrorCode ierr;
396: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
397: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
398: PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
399:
401: if(!barray) {
402: PetscMalloc(bs2*sizeof(MatScalar),&barray);
403: baij->barray = barray;
404: }
406: if (roworiented) {
407: stepval = (n-1)*bs;
408: } else {
409: stepval = (m-1)*bs;
410: }
411: for (i=0; i<m; i++) {
412: if (im[i] < 0) continue;
413: #if defined(PETSC_USE_DEBUG)
414: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
415: #endif
416: if (im[i] >= rstart && im[i] < rend) {
417: row = im[i] - rstart;
418: for (j=0; j<n; j++) {
419: /* If NumCol = 1 then a copy is not required */
420: if ((roworiented) && (n == 1)) {
421: barray = (MatScalar*)v + i*bs2;
422: } else if((!roworiented) && (m == 1)) {
423: barray = (MatScalar*)v + j*bs2;
424: } else { /* Here a copy is required */
425: if (roworiented) {
426: value = v + i*(stepval+bs)*bs + j*bs;
427: } else {
428: value = v + j*(stepval+bs)*bs + i*bs;
429: }
430: for (ii=0; ii<bs; ii++,value+=stepval) {
431: for (jj=0; jj<bs; jj++) {
432: *barray++ = *value++;
433: }
434: }
435: barray -=bs2;
436: }
437:
438: if (in[j] >= cstart && in[j] < cend){
439: col = in[j] - cstart;
440: MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);
441: }
442: else if (in[j] < 0) continue;
443: #if defined(PETSC_USE_DEBUG)
444: else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
445: #endif
446: else {
447: if (mat->was_assembled) {
448: if (!baij->colmap) {
449: CreateColmap_MPIBAIJ_Private(mat);
450: }
452: #if defined(PETSC_USE_DEBUG)
453: #if defined (PETSC_USE_CTABLE)
454: { PetscInt data;
455: PetscTableFind(baij->colmap,in[j]+1,&data);
456: if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
457: }
458: #else
459: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
460: #endif
461: #endif
462: #if defined (PETSC_USE_CTABLE)
463: PetscTableFind(baij->colmap,in[j]+1,&col);
464: col = (col - 1)/bs;
465: #else
466: col = (baij->colmap[in[j]] - 1)/bs;
467: #endif
468: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
469: DisAssemble_MPIBAIJ(mat);
470: col = in[j];
471: }
472: }
473: else col = in[j];
474: MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);
475: }
476: }
477: } else {
478: if (!baij->donotstash) {
479: if (roworiented) {
480: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
481: } else {
482: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
483: }
484: }
485: }
486: }
487: return(0);
488: }
490: #define HASH_KEY 0.6180339887
491: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
492: /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
493: /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
496: PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
497: {
498: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
499: PetscTruth roworiented = baij->roworiented;
501: PetscInt i,j,row,col;
502: PetscInt rstart_orig=mat->rmap.rstart;
503: PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
504: PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
505: PetscReal tmp;
506: MatScalar **HD = baij->hd,value;
507: #if defined(PETSC_USE_DEBUG)
508: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
509: #endif
513: for (i=0; i<m; i++) {
514: #if defined(PETSC_USE_DEBUG)
515: if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
516: if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
517: #endif
518: row = im[i];
519: if (row >= rstart_orig && row < rend_orig) {
520: for (j=0; j<n; j++) {
521: col = in[j];
522: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
523: /* Look up PetscInto the Hash Table */
524: key = (row/bs)*Nbs+(col/bs)+1;
525: h1 = HASH(size,key,tmp);
527:
528: idx = h1;
529: #if defined(PETSC_USE_DEBUG)
530: insert_ct++;
531: total_ct++;
532: if (HT[idx] != key) {
533: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
534: if (idx == size) {
535: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
536: if (idx == h1) {
537: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
538: }
539: }
540: }
541: #else
542: if (HT[idx] != key) {
543: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
544: if (idx == size) {
545: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
546: if (idx == h1) {
547: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
548: }
549: }
550: }
551: #endif
552: /* A HASH table entry is found, so insert the values at the correct address */
553: if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
554: else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value;
555: }
556: } else {
557: if (!baij->donotstash) {
558: if (roworiented) {
559: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
560: } else {
561: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
562: }
563: }
564: }
565: }
566: #if defined(PETSC_USE_DEBUG)
567: baij->ht_total_ct = total_ct;
568: baij->ht_insert_ct = insert_ct;
569: #endif
570: return(0);
571: }
575: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
576: {
577: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
578: PetscTruth roworiented = baij->roworiented;
579: PetscErrorCode ierr;
580: PetscInt i,j,ii,jj,row,col;
581: PetscInt rstart=baij->rstartbs;
582: PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2;
583: PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
584: PetscReal tmp;
585: MatScalar **HD = baij->hd,*baij_a;
586: const MatScalar *v_t,*value;
587: #if defined(PETSC_USE_DEBUG)
588: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
589: #endif
590:
593: if (roworiented) {
594: stepval = (n-1)*bs;
595: } else {
596: stepval = (m-1)*bs;
597: }
598: for (i=0; i<m; i++) {
599: #if defined(PETSC_USE_DEBUG)
600: if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
601: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
602: #endif
603: row = im[i];
604: v_t = v + i*bs2;
605: if (row >= rstart && row < rend) {
606: for (j=0; j<n; j++) {
607: col = in[j];
609: /* Look up into the Hash Table */
610: key = row*Nbs+col+1;
611: h1 = HASH(size,key,tmp);
612:
613: idx = h1;
614: #if defined(PETSC_USE_DEBUG)
615: total_ct++;
616: insert_ct++;
617: if (HT[idx] != key) {
618: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
619: if (idx == size) {
620: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
621: if (idx == h1) {
622: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
623: }
624: }
625: }
626: #else
627: if (HT[idx] != key) {
628: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
629: if (idx == size) {
630: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
631: if (idx == h1) {
632: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
633: }
634: }
635: }
636: #endif
637: baij_a = HD[idx];
638: if (roworiented) {
639: /*value = v + i*(stepval+bs)*bs + j*bs;*/
640: /* value = v + (i*(stepval+bs)+j)*bs; */
641: value = v_t;
642: v_t += bs;
643: if (addv == ADD_VALUES) {
644: for (ii=0; ii<bs; ii++,value+=stepval) {
645: for (jj=ii; jj<bs2; jj+=bs) {
646: baij_a[jj] += *value++;
647: }
648: }
649: } else {
650: for (ii=0; ii<bs; ii++,value+=stepval) {
651: for (jj=ii; jj<bs2; jj+=bs) {
652: baij_a[jj] = *value++;
653: }
654: }
655: }
656: } else {
657: value = v + j*(stepval+bs)*bs + i*bs;
658: if (addv == ADD_VALUES) {
659: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
660: for (jj=0; jj<bs; jj++) {
661: baij_a[jj] += *value++;
662: }
663: }
664: } else {
665: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
666: for (jj=0; jj<bs; jj++) {
667: baij_a[jj] = *value++;
668: }
669: }
670: }
671: }
672: }
673: } else {
674: if (!baij->donotstash) {
675: if (roworiented) {
676: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
677: } else {
678: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
679: }
680: }
681: }
682: }
683: #if defined(PETSC_USE_DEBUG)
684: baij->ht_total_ct = total_ct;
685: baij->ht_insert_ct = insert_ct;
686: #endif
687: return(0);
688: }
692: PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
693: {
694: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
696: PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
697: PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
700: for (i=0; i<m; i++) {
701: if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
702: if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
703: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
704: row = idxm[i] - bsrstart;
705: for (j=0; j<n; j++) {
706: if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
707: if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
708: if (idxn[j] >= bscstart && idxn[j] < bscend){
709: col = idxn[j] - bscstart;
710: MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
711: } else {
712: if (!baij->colmap) {
713: CreateColmap_MPIBAIJ_Private(mat);
714: }
715: #if defined (PETSC_USE_CTABLE)
716: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
717: data --;
718: #else
719: data = baij->colmap[idxn[j]/bs]-1;
720: #endif
721: if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
722: else {
723: col = data + idxn[j]%bs;
724: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
725: }
726: }
727: }
728: } else {
729: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
730: }
731: }
732: return(0);
733: }
737: PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
738: {
739: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
740: Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
742: PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
743: PetscReal sum = 0.0;
744: MatScalar *v;
747: if (baij->size == 1) {
748: MatNorm(baij->A,type,nrm);
749: } else {
750: if (type == NORM_FROBENIUS) {
751: v = amat->a;
752: nz = amat->nz*bs2;
753: for (i=0; i<nz; i++) {
754: #if defined(PETSC_USE_COMPLEX)
755: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
756: #else
757: sum += (*v)*(*v); v++;
758: #endif
759: }
760: v = bmat->a;
761: nz = bmat->nz*bs2;
762: for (i=0; i<nz; i++) {
763: #if defined(PETSC_USE_COMPLEX)
764: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
765: #else
766: sum += (*v)*(*v); v++;
767: #endif
768: }
769: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);
770: *nrm = sqrt(*nrm);
771: } else if (type == NORM_1) { /* max column sum */
772: PetscReal *tmp,*tmp2;
773: PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs;
774: PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);
775: tmp2 = tmp + mat->cmap.N;
776: PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));
777: v = amat->a; jj = amat->j;
778: for (i=0; i<amat->nz; i++) {
779: for (j=0; j<bs; j++){
780: col = bs*(cstart + *jj) + j; /* column index */
781: for (row=0; row<bs; row++){
782: tmp[col] += PetscAbsScalar(*v); v++;
783: }
784: }
785: jj++;
786: }
787: v = bmat->a; jj = bmat->j;
788: for (i=0; i<bmat->nz; i++) {
789: for (j=0; j<bs; j++){
790: col = bs*garray[*jj] + j;
791: for (row=0; row<bs; row++){
792: tmp[col] += PetscAbsScalar(*v); v++;
793: }
794: }
795: jj++;
796: }
797: MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);
798: *nrm = 0.0;
799: for (j=0; j<mat->cmap.N; j++) {
800: if (tmp2[j] > *nrm) *nrm = tmp2[j];
801: }
802: PetscFree(tmp);
803: } else if (type == NORM_INFINITY) { /* max row sum */
804: PetscReal *sums;
805: PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
806: sum = 0.0;
807: for (j=0; j<amat->mbs; j++) {
808: for (row=0; row<bs; row++) sums[row] = 0.0;
809: v = amat->a + bs2*amat->i[j];
810: nz = amat->i[j+1]-amat->i[j];
811: for (i=0; i<nz; i++) {
812: for (col=0; col<bs; col++){
813: for (row=0; row<bs; row++){
814: sums[row] += PetscAbsScalar(*v); v++;
815: }
816: }
817: }
818: v = bmat->a + bs2*bmat->i[j];
819: nz = bmat->i[j+1]-bmat->i[j];
820: for (i=0; i<nz; i++) {
821: for (col=0; col<bs; col++){
822: for (row=0; row<bs; row++){
823: sums[row] += PetscAbsScalar(*v); v++;
824: }
825: }
826: }
827: for (row=0; row<bs; row++){
828: if (sums[row] > sum) sum = sums[row];
829: }
830: }
831: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);
832: PetscFree(sums);
833: } else {
834: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835: }
836: }
837: return(0);
838: }
840: /*
841: Creates the hash table, and sets the table
842: This table is created only once.
843: If new entried need to be added to the matrix
844: then the hash table has to be destroyed and
845: recreated.
846: */
849: PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
850: {
851: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
852: Mat A = baij->A,B=baij->B;
853: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
854: PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856: PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs;
857: PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858: PetscInt *HT,key;
859: MatScalar **HD;
860: PetscReal tmp;
861: #if defined(PETSC_USE_INFO)
862: PetscInt ct=0,max=0;
863: #endif
866: baij->ht_size=(PetscInt)(factor*nz);
867: size = baij->ht_size;
869: if (baij->ht) {
870: return(0);
871: }
872:
873: /* Allocate Memory for Hash Table */
874: PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);
875: baij->ht = (PetscInt*)(baij->hd + size);
876: HD = baij->hd;
877: HT = baij->ht;
880: PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));
881:
883: /* Loop Over A */
884: for (i=0; i<a->mbs; i++) {
885: for (j=ai[i]; j<ai[i+1]; j++) {
886: row = i+rstart;
887: col = aj[j]+cstart;
888:
889: key = row*Nbs + col + 1;
890: h1 = HASH(size,key,tmp);
891: for (k=0; k<size; k++){
892: if (!HT[(h1+k)%size]) {
893: HT[(h1+k)%size] = key;
894: HD[(h1+k)%size] = a->a + j*bs2;
895: break;
896: #if defined(PETSC_USE_INFO)
897: } else {
898: ct++;
899: #endif
900: }
901: }
902: #if defined(PETSC_USE_INFO)
903: if (k> max) max = k;
904: #endif
905: }
906: }
907: /* Loop Over B */
908: for (i=0; i<b->mbs; i++) {
909: for (j=bi[i]; j<bi[i+1]; j++) {
910: row = i+rstart;
911: col = garray[bj[j]];
912: key = row*Nbs + col + 1;
913: h1 = HASH(size,key,tmp);
914: for (k=0; k<size; k++){
915: if (!HT[(h1+k)%size]) {
916: HT[(h1+k)%size] = key;
917: HD[(h1+k)%size] = b->a + j*bs2;
918: break;
919: #if defined(PETSC_USE_INFO)
920: } else {
921: ct++;
922: #endif
923: }
924: }
925: #if defined(PETSC_USE_INFO)
926: if (k> max) max = k;
927: #endif
928: }
929: }
930:
931: /* Print Summary */
932: #if defined(PETSC_USE_INFO)
933: for (i=0,j=0; i<size; i++) {
934: if (HT[i]) {j++;}
935: }
936: PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
937: #endif
938: return(0);
939: }
943: PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944: {
945: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
947: PetscInt nstash,reallocs;
948: InsertMode addv;
951: if (baij->donotstash) {
952: return(0);
953: }
955: /* make sure all processors are either in INSERTMODE or ADDMODE */
956: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
957: if (addv == (ADD_VALUES|INSERT_VALUES)) {
958: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
959: }
960: mat->insertmode = addv; /* in case this processor had no cache */
962: MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);
963: MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);
964: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
965: PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
966: MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);
967: PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
968: return(0);
969: }
973: PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
974: {
975: Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
976: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data;
978: PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
979: PetscInt *row,*col,other_disassembled;
980: PetscTruth r1,r2,r3;
981: MatScalar *val;
982: InsertMode addv = mat->insertmode;
983: PetscMPIInt n;
985: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
987: if (!baij->donotstash) {
988: while (1) {
989: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
990: if (!flg) break;
992: for (i=0; i<n;) {
993: /* Now identify the consecutive vals belonging to the same row */
994: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995: if (j < n) ncols = j-i;
996: else ncols = n-i;
997: /* Now assemble all these values with a single function call */
998: MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);
999: i = j;
1000: }
1001: }
1002: MatStashScatterEnd_Private(&mat->stash);
1003: /* Now process the block-stash. Since the values are stashed column-oriented,
1004: set the roworiented flag to column oriented, and after MatSetValues()
1005: restore the original flags */
1006: r1 = baij->roworiented;
1007: r2 = a->roworiented;
1008: r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1009: baij->roworiented = PETSC_FALSE;
1010: a->roworiented = PETSC_FALSE;
1011: (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
1012: while (1) {
1013: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
1014: if (!flg) break;
1015:
1016: for (i=0; i<n;) {
1017: /* Now identify the consecutive vals belonging to the same row */
1018: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019: if (j < n) ncols = j-i;
1020: else ncols = n-i;
1021: MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
1022: i = j;
1023: }
1024: }
1025: MatStashScatterEnd_Private(&mat->bstash);
1026: baij->roworiented = r1;
1027: a->roworiented = r2;
1028: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
1029: }
1030:
1031: MatAssemblyBegin(baij->A,mode);
1032: MatAssemblyEnd(baij->A,mode);
1034: /* determine if any processor has disassembled, if so we must
1035: also disassemble ourselfs, in order that we may reassemble. */
1036: /*
1037: if nonzero structure of submatrix B cannot change then we know that
1038: no processor disassembled thus we can skip this stuff
1039: */
1040: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
1041: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
1042: if (mat->was_assembled && !other_disassembled) {
1043: DisAssemble_MPIBAIJ(mat);
1044: }
1045: }
1047: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048: MatSetUpMultiply_MPIBAIJ(mat);
1049: }
1050: ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1051: MatAssemblyBegin(baij->B,mode);
1052: MatAssemblyEnd(baij->B,mode);
1053:
1054: #if defined(PETSC_USE_INFO)
1055: if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1056: PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1057: baij->ht_total_ct = 0;
1058: baij->ht_insert_ct = 0;
1059: }
1060: #endif
1061: if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1062: MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);
1063: mat->ops->setvalues = MatSetValues_MPIBAIJ_HT;
1064: mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1065: }
1067: PetscFree(baij->rowvalues);
1068: baij->rowvalues = 0;
1069: return(0);
1070: }
1074: static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1075: {
1076: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1077: PetscErrorCode ierr;
1078: PetscMPIInt size = baij->size,rank = baij->rank;
1079: PetscInt bs = mat->rmap.bs;
1080: PetscTruth iascii,isdraw;
1081: PetscViewer sviewer;
1082: PetscViewerFormat format;
1085: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1086: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1087: if (iascii) {
1088: PetscViewerGetFormat(viewer,&format);
1089: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1090: MatInfo info;
1091: MPI_Comm_rank(mat->comm,&rank);
1092: MatGetInfo(mat,MAT_LOCAL,&info);
1093: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1094: rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1095: mat->rmap.bs,(PetscInt)info.memory);
1096: MatGetInfo(baij->A,MAT_LOCAL,&info);
1097: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);
1098: MatGetInfo(baij->B,MAT_LOCAL,&info);
1099: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);
1100: PetscViewerFlush(viewer);
1101: VecScatterView(baij->Mvctx,viewer);
1102: return(0);
1103: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1104: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1105: return(0);
1106: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1107: return(0);
1108: }
1109: }
1111: if (isdraw) {
1112: PetscDraw draw;
1113: PetscTruth isnull;
1114: PetscViewerDrawGetDraw(viewer,0,&draw);
1115: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1116: }
1118: if (size == 1) {
1119: PetscObjectSetName((PetscObject)baij->A,mat->name);
1120: MatView(baij->A,viewer);
1121: } else {
1122: /* assemble the entire matrix onto first processor. */
1123: Mat A;
1124: Mat_SeqBAIJ *Aloc;
1125: PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1126: MatScalar *a;
1128: /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1129: /* Perhaps this should be the type of mat? */
1130: MatCreate(mat->comm,&A);
1131: if (!rank) {
1132: MatSetSizes(A,M,N,M,N);
1133: } else {
1134: MatSetSizes(A,0,0,M,N);
1135: }
1136: MatSetType(A,MATMPIBAIJ);
1137: MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);
1138: PetscLogObjectParent(mat,A);
1140: /* copy over the A part */
1141: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1142: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143: PetscMalloc(bs*sizeof(PetscInt),&rvals);
1145: for (i=0; i<mbs; i++) {
1146: rvals[0] = bs*(baij->rstartbs + i);
1147: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1148: for (j=ai[i]; j<ai[i+1]; j++) {
1149: col = (baij->cstartbs+aj[j])*bs;
1150: for (k=0; k<bs; k++) {
1151: MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
1152: col++; a += bs;
1153: }
1154: }
1155: }
1156: /* copy over the B part */
1157: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1158: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1159: for (i=0; i<mbs; i++) {
1160: rvals[0] = bs*(baij->rstartbs + i);
1161: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1162: for (j=ai[i]; j<ai[i+1]; j++) {
1163: col = baij->garray[aj[j]]*bs;
1164: for (k=0; k<bs; k++) {
1165: MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
1166: col++; a += bs;
1167: }
1168: }
1169: }
1170: PetscFree(rvals);
1171: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1172: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1173: /*
1174: Everyone has to call to draw the matrix since the graphics waits are
1175: synchronized across all processors that share the PetscDraw object
1176: */
1177: PetscViewerGetSingleton(viewer,&sviewer);
1178: if (!rank) {
1179: PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);
1180: MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);
1181: }
1182: PetscViewerRestoreSingleton(viewer,&sviewer);
1183: MatDestroy(A);
1184: }
1185: return(0);
1186: }
1190: PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1191: {
1193: PetscTruth iascii,isdraw,issocket,isbinary;
1196: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1197: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1198: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1199: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1200: if (iascii || isdraw || issocket || isbinary) {
1201: MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);
1202: } else {
1203: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1204: }
1205: return(0);
1206: }
1210: PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1211: {
1212: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1216: #if defined(PETSC_USE_LOG)
1217: PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1218: #endif
1219: MatStashDestroy_Private(&mat->stash);
1220: MatStashDestroy_Private(&mat->bstash);
1221: MatDestroy(baij->A);
1222: MatDestroy(baij->B);
1223: #if defined (PETSC_USE_CTABLE)
1224: if (baij->colmap) {PetscTableDelete(baij->colmap);}
1225: #else
1226: PetscFree(baij->colmap);
1227: #endif
1228: PetscFree(baij->garray);
1229: if (baij->lvec) {VecDestroy(baij->lvec);}
1230: if (baij->Mvctx) {VecScatterDestroy(baij->Mvctx);}
1231: PetscFree(baij->rowvalues);
1232: PetscFree(baij->barray);
1233: PetscFree(baij->hd);
1234: #if defined(PETSC_USE_MAT_SINGLE)
1235: PetscFree(baij->setvaluescopy);
1236: #endif
1237: PetscFree(baij->rangebs);
1238: PetscFree(baij);
1240: PetscObjectChangeTypeName((PetscObject)mat,0);
1241: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
1242: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
1243: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
1244: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);
1245: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);
1246: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
1247: PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);
1248: return(0);
1249: }
1253: PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1254: {
1255: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1257: PetscInt nt;
1260: VecGetLocalSize(xx,&nt);
1261: if (nt != A->cmap.n) {
1262: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1263: }
1264: VecGetLocalSize(yy,&nt);
1265: if (nt != A->rmap.n) {
1266: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1267: }
1268: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1269: (*a->A->ops->mult)(a->A,xx,yy);
1270: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1271: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1272: return(0);
1273: }
1277: PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1278: {
1279: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1283: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1284: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1285: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1286: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1287: return(0);
1288: }
1292: PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1293: {
1294: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1296: PetscTruth merged;
1299: VecScatterGetMerged(a->Mvctx,&merged);
1300: /* do nondiagonal part */
1301: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1302: if (!merged) {
1303: /* send it on its way */
1304: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1305: /* do local part */
1306: (*a->A->ops->multtranspose)(a->A,xx,yy);
1307: /* receive remote parts: note this assumes the values are not actually */
1308: /* inserted in yy until the next line */
1309: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1310: } else {
1311: /* do local part */
1312: (*a->A->ops->multtranspose)(a->A,xx,yy);
1313: /* send it on its way */
1314: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1315: /* values actually were received in the Begin() but we need to call this nop */
1316: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1317: }
1318: return(0);
1319: }
1323: PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1324: {
1325: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1329: /* do nondiagonal part */
1330: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1331: /* send it on its way */
1332: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1333: /* do local part */
1334: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1335: /* receive remote parts: note this assumes the values are not actually */
1336: /* inserted in yy until the next line, which is true for my implementation*/
1337: /* but is not perhaps always true. */
1338: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1339: return(0);
1340: }
1342: /*
1343: This only works correctly for square matrices where the subblock A->A is the
1344: diagonal block
1345: */
1348: PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1349: {
1350: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1354: if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1355: MatGetDiagonal(a->A,v);
1356: return(0);
1357: }
1361: PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1362: {
1363: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1367: MatScale(a->A,aa);
1368: MatScale(a->B,aa);
1369: return(0);
1370: }
1374: PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1375: {
1376: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1377: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1379: PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1380: PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1381: PetscInt *cmap,*idx_p,cstart = mat->cstartbs;
1384: if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1385: mat->getrowactive = PETSC_TRUE;
1387: if (!mat->rowvalues && (idx || v)) {
1388: /*
1389: allocate enough space to hold information from the longest row.
1390: */
1391: Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1392: PetscInt max = 1,mbs = mat->mbs,tmp;
1393: for (i=0; i<mbs; i++) {
1394: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1395: if (max < tmp) { max = tmp; }
1396: }
1397: PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);
1398: mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1399: }
1400:
1401: if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1402: lrow = row - brstart;
1404: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1405: if (!v) {pvA = 0; pvB = 0;}
1406: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1407: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1408: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1409: nztot = nzA + nzB;
1411: cmap = mat->garray;
1412: if (v || idx) {
1413: if (nztot) {
1414: /* Sort by increasing column numbers, assuming A and B already sorted */
1415: PetscInt imark = -1;
1416: if (v) {
1417: *v = v_p = mat->rowvalues;
1418: for (i=0; i<nzB; i++) {
1419: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1420: else break;
1421: }
1422: imark = i;
1423: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1424: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1425: }
1426: if (idx) {
1427: *idx = idx_p = mat->rowindices;
1428: if (imark > -1) {
1429: for (i=0; i<imark; i++) {
1430: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1431: }
1432: } else {
1433: for (i=0; i<nzB; i++) {
1434: if (cmap[cworkB[i]/bs] < cstart)
1435: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1436: else break;
1437: }
1438: imark = i;
1439: }
1440: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1441: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1442: }
1443: } else {
1444: if (idx) *idx = 0;
1445: if (v) *v = 0;
1446: }
1447: }
1448: *nz = nztot;
1449: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1450: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1451: return(0);
1452: }
1456: PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1457: {
1458: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1461: if (!baij->getrowactive) {
1462: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1463: }
1464: baij->getrowactive = PETSC_FALSE;
1465: return(0);
1466: }
1470: PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1471: {
1472: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1476: MatZeroEntries(l->A);
1477: MatZeroEntries(l->B);
1478: return(0);
1479: }
1483: PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1484: {
1485: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1486: Mat A = a->A,B = a->B;
1488: PetscReal isend[5],irecv[5];
1491: info->block_size = (PetscReal)matin->rmap.bs;
1492: MatGetInfo(A,MAT_LOCAL,info);
1493: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1494: isend[3] = info->memory; isend[4] = info->mallocs;
1495: MatGetInfo(B,MAT_LOCAL,info);
1496: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1497: isend[3] += info->memory; isend[4] += info->mallocs;
1498: if (flag == MAT_LOCAL) {
1499: info->nz_used = isend[0];
1500: info->nz_allocated = isend[1];
1501: info->nz_unneeded = isend[2];
1502: info->memory = isend[3];
1503: info->mallocs = isend[4];
1504: } else if (flag == MAT_GLOBAL_MAX) {
1505: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1506: info->nz_used = irecv[0];
1507: info->nz_allocated = irecv[1];
1508: info->nz_unneeded = irecv[2];
1509: info->memory = irecv[3];
1510: info->mallocs = irecv[4];
1511: } else if (flag == MAT_GLOBAL_SUM) {
1512: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1513: info->nz_used = irecv[0];
1514: info->nz_allocated = irecv[1];
1515: info->nz_unneeded = irecv[2];
1516: info->memory = irecv[3];
1517: info->mallocs = irecv[4];
1518: } else {
1519: SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1520: }
1521: info->rows_global = (PetscReal)A->rmap.N;
1522: info->columns_global = (PetscReal)A->cmap.N;
1523: info->rows_local = (PetscReal)A->rmap.N;
1524: info->columns_local = (PetscReal)A->cmap.N;
1525: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1526: info->fill_ratio_needed = 0;
1527: info->factor_mallocs = 0;
1528: return(0);
1529: }
1533: PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1534: {
1535: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1539: switch (op) {
1540: case MAT_NO_NEW_NONZERO_LOCATIONS:
1541: case MAT_YES_NEW_NONZERO_LOCATIONS:
1542: case MAT_COLUMNS_UNSORTED:
1543: case MAT_COLUMNS_SORTED:
1544: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1545: case MAT_KEEP_ZEROED_ROWS:
1546: case MAT_NEW_NONZERO_LOCATION_ERR:
1547: MatSetOption(a->A,op);
1548: MatSetOption(a->B,op);
1549: break;
1550: case MAT_ROW_ORIENTED:
1551: a->roworiented = PETSC_TRUE;
1552: MatSetOption(a->A,op);
1553: MatSetOption(a->B,op);
1554: break;
1555: case MAT_ROWS_SORTED:
1556: case MAT_ROWS_UNSORTED:
1557: case MAT_YES_NEW_DIAGONALS:
1558: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1559: break;
1560: case MAT_COLUMN_ORIENTED:
1561: a->roworiented = PETSC_FALSE;
1562: MatSetOption(a->A,op);
1563: MatSetOption(a->B,op);
1564: break;
1565: case MAT_IGNORE_OFF_PROC_ENTRIES:
1566: a->donotstash = PETSC_TRUE;
1567: break;
1568: case MAT_NO_NEW_DIAGONALS:
1569: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1570: case MAT_USE_HASH_TABLE:
1571: a->ht_flag = PETSC_TRUE;
1572: break;
1573: case MAT_SYMMETRIC:
1574: case MAT_STRUCTURALLY_SYMMETRIC:
1575: case MAT_HERMITIAN:
1576: case MAT_SYMMETRY_ETERNAL:
1577: MatSetOption(a->A,op);
1578: break;
1579: case MAT_NOT_SYMMETRIC:
1580: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1581: case MAT_NOT_HERMITIAN:
1582: case MAT_NOT_SYMMETRY_ETERNAL:
1583: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1584: break;
1585: default:
1586: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1587: }
1588: return(0);
1589: }
1593: PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1594: {
1595: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1596: Mat_SeqBAIJ *Aloc;
1597: Mat B;
1599: PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1600: PetscInt bs=A->rmap.bs,mbs=baij->mbs;
1601: MatScalar *a;
1602:
1604: if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1605: MatCreate(A->comm,&B);
1606: MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);
1607: MatSetType(B,A->type_name);
1608: MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);
1609:
1610: /* copy over the A part */
1611: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1612: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1613: PetscMalloc(bs*sizeof(PetscInt),&rvals);
1614:
1615: for (i=0; i<mbs; i++) {
1616: rvals[0] = bs*(baij->rstartbs + i);
1617: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1618: for (j=ai[i]; j<ai[i+1]; j++) {
1619: col = (baij->cstartbs+aj[j])*bs;
1620: for (k=0; k<bs; k++) {
1621: MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);
1622: col++; a += bs;
1623: }
1624: }
1625: }
1626: /* copy over the B part */
1627: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1628: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1629: for (i=0; i<mbs; i++) {
1630: rvals[0] = bs*(baij->rstartbs + i);
1631: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1632: for (j=ai[i]; j<ai[i+1]; j++) {
1633: col = baij->garray[aj[j]]*bs;
1634: for (k=0; k<bs; k++) {
1635: MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);
1636: col++; a += bs;
1637: }
1638: }
1639: }
1640: PetscFree(rvals);
1641: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1642: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1643:
1644: if (matout) {
1645: *matout = B;
1646: } else {
1647: MatHeaderCopy(A,B);
1648: }
1649: return(0);
1650: }
1654: PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1655: {
1656: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1657: Mat a = baij->A,b = baij->B;
1659: PetscInt s1,s2,s3;
1662: MatGetLocalSize(mat,&s2,&s3);
1663: if (rr) {
1664: VecGetLocalSize(rr,&s1);
1665: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1666: /* Overlap communication with computation. */
1667: VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);
1668: }
1669: if (ll) {
1670: VecGetLocalSize(ll,&s1);
1671: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1672: (*b->ops->diagonalscale)(b,ll,PETSC_NULL);
1673: }
1674: /* scale the diagonal block */
1675: (*a->ops->diagonalscale)(a,ll,rr);
1677: if (rr) {
1678: /* Do a scatter end and then right scale the off-diagonal block */
1679: VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);
1680: (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);
1681: }
1682:
1683: return(0);
1684: }
1688: PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1689: {
1690: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1692: PetscMPIInt imdex,size = l->size,n,rank = l->rank;
1693: PetscInt i,*owners = A->rmap.range;
1694: PetscInt *nprocs,j,idx,nsends,row;
1695: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
1696: PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1697: PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1698: MPI_Comm comm = A->comm;
1699: MPI_Request *send_waits,*recv_waits;
1700: MPI_Status recv_status,*send_status;
1701: #if defined(PETSC_DEBUG)
1702: PetscTruth found = PETSC_FALSE;
1703: #endif
1704:
1706: /* first count number of contributors to each processor */
1707: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
1708: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
1709: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
1710: j = 0;
1711: for (i=0; i<N; i++) {
1712: if (lastidx > (idx = rows[i])) j = 0;
1713: lastidx = idx;
1714: for (; j<size; j++) {
1715: if (idx >= owners[j] && idx < owners[j+1]) {
1716: nprocs[2*j]++;
1717: nprocs[2*j+1] = 1;
1718: owner[i] = j;
1719: #if defined(PETSC_DEBUG)
1720: found = PETSC_TRUE;
1721: #endif
1722: break;
1723: }
1724: }
1725: #if defined(PETSC_DEBUG)
1726: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1727: found = PETSC_FALSE;
1728: #endif
1729: }
1730: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1731:
1732: /* inform other processors of number of messages and max length*/
1733: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1734:
1735: /* post receives: */
1736: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
1737: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1738: for (i=0; i<nrecvs; i++) {
1739: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1740: }
1741:
1742: /* do sends:
1743: 1) starts[i] gives the starting index in svalues for stuff going to
1744: the ith processor
1745: */
1746: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
1747: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1748: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
1749: starts[0] = 0;
1750: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1751: for (i=0; i<N; i++) {
1752: svalues[starts[owner[i]]++] = rows[i];
1753: }
1754:
1755: starts[0] = 0;
1756: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1757: count = 0;
1758: for (i=0; i<size; i++) {
1759: if (nprocs[2*i+1]) {
1760: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
1761: }
1762: }
1763: PetscFree(starts);
1765: base = owners[rank];
1766:
1767: /* wait on receives */
1768: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
1769: source = lens + nrecvs;
1770: count = nrecvs; slen = 0;
1771: while (count) {
1772: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1773: /* unpack receives into our local space */
1774: MPI_Get_count(&recv_status,MPIU_INT,&n);
1775: source[imdex] = recv_status.MPI_SOURCE;
1776: lens[imdex] = n;
1777: slen += n;
1778: count--;
1779: }
1780: PetscFree(recv_waits);
1781:
1782: /* move the data into the send scatter */
1783: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
1784: count = 0;
1785: for (i=0; i<nrecvs; i++) {
1786: values = rvalues + i*nmax;
1787: for (j=0; j<lens[i]; j++) {
1788: lrows[count++] = values[j] - base;
1789: }
1790: }
1791: PetscFree(rvalues);
1792: PetscFree(lens);
1793: PetscFree(owner);
1794: PetscFree(nprocs);
1795:
1796: /* actually zap the local rows */
1797: /*
1798: Zero the required rows. If the "diagonal block" of the matrix
1799: is square and the user wishes to set the diagonal we use separate
1800: code so that MatSetValues() is not called for each diagonal allocating
1801: new memory, thus calling lots of mallocs and slowing things down.
1803: Contributed by: Matthew Knepley
1804: */
1805: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1806: MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);
1807: if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1808: MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);
1809: } else if (diag != 0.0) {
1810: MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);
1811: if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1812: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1813: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1814: }
1815: for (i=0; i<slen; i++) {
1816: row = lrows[i] + rstart_bs;
1817: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
1818: }
1819: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1820: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1821: } else {
1822: MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);
1823: }
1825: PetscFree(lrows);
1827: /* wait on sends */
1828: if (nsends) {
1829: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1830: MPI_Waitall(nsends,send_waits,send_status);
1831: PetscFree(send_status);
1832: }
1833: PetscFree(send_waits);
1834: PetscFree(svalues);
1836: return(0);
1837: }
1841: PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1842: {
1843: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1847: MatSetUnfactored(a->A);
1848: return(0);
1849: }
1851: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1855: PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1856: {
1857: Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1858: Mat a,b,c,d;
1859: PetscTruth flg;
1863: a = matA->A; b = matA->B;
1864: c = matB->A; d = matB->B;
1866: MatEqual(a,c,&flg);
1867: if (flg) {
1868: MatEqual(b,d,&flg);
1869: }
1870: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1871: return(0);
1872: }
1876: PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1877: {
1879: Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1880: Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1883: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1884: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1885: MatCopy_Basic(A,B,str);
1886: } else {
1887: MatCopy(a->A,b->A,str);
1888: MatCopy(a->B,b->B,str);
1889: }
1890: return(0);
1891: }
1895: PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1896: {
1900: MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1901: return(0);
1902: }
1904: #include petscblaslapack.h
1907: PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1908: {
1910: Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1911: PetscBLASInt bnz,one=1;
1912: Mat_SeqBAIJ *x,*y;
1915: if (str == SAME_NONZERO_PATTERN) {
1916: PetscScalar alpha = a;
1917: x = (Mat_SeqBAIJ *)xx->A->data;
1918: y = (Mat_SeqBAIJ *)yy->A->data;
1919: bnz = (PetscBLASInt)x->nz;
1920: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1921: x = (Mat_SeqBAIJ *)xx->B->data;
1922: y = (Mat_SeqBAIJ *)yy->B->data;
1923: bnz = (PetscBLASInt)x->nz;
1924: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1925: } else {
1926: MatAXPY_Basic(Y,a,X,str);
1927: }
1928: return(0);
1929: }
1933: PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1934: {
1935: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1939: MatRealPart(a->A);
1940: MatRealPart(a->B);
1941: return(0);
1942: }
1946: PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1947: {
1948: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1952: MatImaginaryPart(a->A);
1953: MatImaginaryPart(a->B);
1954: return(0);
1955: }
1957: /* -------------------------------------------------------------------*/
1958: static struct _MatOps MatOps_Values = {
1959: MatSetValues_MPIBAIJ,
1960: MatGetRow_MPIBAIJ,
1961: MatRestoreRow_MPIBAIJ,
1962: MatMult_MPIBAIJ,
1963: /* 4*/ MatMultAdd_MPIBAIJ,
1964: MatMultTranspose_MPIBAIJ,
1965: MatMultTransposeAdd_MPIBAIJ,
1966: 0,
1967: 0,
1968: 0,
1969: /*10*/ 0,
1970: 0,
1971: 0,
1972: 0,
1973: MatTranspose_MPIBAIJ,
1974: /*15*/ MatGetInfo_MPIBAIJ,
1975: MatEqual_MPIBAIJ,
1976: MatGetDiagonal_MPIBAIJ,
1977: MatDiagonalScale_MPIBAIJ,
1978: MatNorm_MPIBAIJ,
1979: /*20*/ MatAssemblyBegin_MPIBAIJ,
1980: MatAssemblyEnd_MPIBAIJ,
1981: 0,
1982: MatSetOption_MPIBAIJ,
1983: MatZeroEntries_MPIBAIJ,
1984: /*25*/ MatZeroRows_MPIBAIJ,
1985: 0,
1986: 0,
1987: 0,
1988: 0,
1989: /*30*/ MatSetUpPreallocation_MPIBAIJ,
1990: 0,
1991: 0,
1992: 0,
1993: 0,
1994: /*35*/ MatDuplicate_MPIBAIJ,
1995: 0,
1996: 0,
1997: 0,
1998: 0,
1999: /*40*/ MatAXPY_MPIBAIJ,
2000: MatGetSubMatrices_MPIBAIJ,
2001: MatIncreaseOverlap_MPIBAIJ,
2002: MatGetValues_MPIBAIJ,
2003: MatCopy_MPIBAIJ,
2004: /*45*/ 0,
2005: MatScale_MPIBAIJ,
2006: 0,
2007: 0,
2008: 0,
2009: /*50*/ 0,
2010: 0,
2011: 0,
2012: 0,
2013: 0,
2014: /*55*/ 0,
2015: 0,
2016: MatSetUnfactored_MPIBAIJ,
2017: 0,
2018: MatSetValuesBlocked_MPIBAIJ,
2019: /*60*/ 0,
2020: MatDestroy_MPIBAIJ,
2021: MatView_MPIBAIJ,
2022: 0,
2023: 0,
2024: /*65*/ 0,
2025: 0,
2026: 0,
2027: 0,
2028: 0,
2029: /*70*/ MatGetRowMax_MPIBAIJ,
2030: 0,
2031: 0,
2032: 0,
2033: 0,
2034: /*75*/ 0,
2035: 0,
2036: 0,
2037: 0,
2038: 0,
2039: /*80*/ 0,
2040: 0,
2041: 0,
2042: 0,
2043: MatLoad_MPIBAIJ,
2044: /*85*/ 0,
2045: 0,
2046: 0,
2047: 0,
2048: 0,
2049: /*90*/ 0,
2050: 0,
2051: 0,
2052: 0,
2053: 0,
2054: /*95*/ 0,
2055: 0,
2056: 0,
2057: 0,
2058: 0,
2059: /*100*/0,
2060: 0,
2061: 0,
2062: 0,
2063: 0,
2064: /*105*/0,
2065: MatRealPart_MPIBAIJ,
2066: MatImaginaryPart_MPIBAIJ};
2072: PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2073: {
2075: *a = ((Mat_MPIBAIJ *)A->data)->A;
2076: *iscopy = PETSC_FALSE;
2077: return(0);
2078: }
2087: PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2088: {
2089: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data;
2090: PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2091: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2092: const PetscInt *JJ;
2093: PetscScalar *values;
2097: #if defined(PETSC_OPT_g)
2098: if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2099: #endif
2100: PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);
2101: o_nnz = d_nnz + m;
2103: for (i=0; i<m; i++) {
2104: nnz = Ii[i+1]- Ii[i];
2105: JJ = J + Ii[i];
2106: nnz_max = PetscMax(nnz_max,nnz);
2107: #if defined(PETSC_OPT_g)
2108: if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2109: #endif
2110: for (j=0; j<nnz; j++) {
2111: if (*JJ >= cstart) break;
2112: JJ++;
2113: }
2114: d = 0;
2115: for (; j<nnz; j++) {
2116: if (*JJ++ >= cend) break;
2117: d++;
2118: }
2119: d_nnz[i] = d;
2120: o_nnz[i] = nnz - d;
2121: }
2122: MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
2123: PetscFree(d_nnz);
2125: if (v) values = (PetscScalar*)v;
2126: else {
2127: PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);
2128: PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));
2129: }
2131: MatSetOption(B,MAT_COLUMNS_SORTED);
2132: for (i=0; i<m; i++) {
2133: ii = i + rstart;
2134: nnz = Ii[i+1]- Ii[i];
2135: MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
2136: }
2137: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2138: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2139: MatSetOption(B,MAT_COLUMNS_UNSORTED);
2141: if (!v) {
2142: PetscFree(values);
2143: }
2144: return(0);
2145: }
2149: /*@C
2150: MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2151: (the default parallel PETSc format).
2153: Collective on MPI_Comm
2155: Input Parameters:
2156: + A - the matrix
2157: . i - the indices into j for the start of each local row (starts with zero)
2158: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2159: - v - optional values in the matrix
2161: Level: developer
2163: .keywords: matrix, aij, compressed row, sparse, parallel
2165: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2166: @*/
2167: PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2168: {
2169: PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2172: PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);
2173: if (f) {
2174: (*f)(B,bs,i,j,v);
2175: }
2176: return(0);
2177: }
2182: PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2183: {
2184: Mat_MPIBAIJ *b;
2186: PetscInt i;
2189: B->preallocated = PETSC_TRUE;
2190: PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");
2191: PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);
2192: PetscOptionsEnd();
2194: if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2195: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2196: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2197: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2198: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2199:
2200: B->rmap.bs = bs;
2201: B->cmap.bs = bs;
2202: PetscMapInitialize(B->comm,&B->rmap);
2203: PetscMapInitialize(B->comm,&B->cmap);
2205: if (d_nnz) {
2206: for (i=0; i<B->rmap.n/bs; i++) {
2207: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2208: }
2209: }
2210: if (o_nnz) {
2211: for (i=0; i<B->rmap.n/bs; i++) {
2212: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2213: }
2214: }
2216: b = (Mat_MPIBAIJ*)B->data;
2217: b->bs2 = bs*bs;
2218: b->mbs = B->rmap.n/bs;
2219: b->nbs = B->cmap.n/bs;
2220: b->Mbs = B->rmap.N/bs;
2221: b->Nbs = B->cmap.N/bs;
2223: for (i=0; i<=b->size; i++) {
2224: b->rangebs[i] = B->rmap.range[i]/bs;
2225: }
2226: b->rstartbs = B->rmap.rstart/bs;
2227: b->rendbs = B->rmap.rend/bs;
2228: b->cstartbs = B->cmap.rstart/bs;
2229: b->cendbs = B->cmap.rend/bs;
2231: MatCreate(PETSC_COMM_SELF,&b->A);
2232: MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);
2233: MatSetType(b->A,MATSEQBAIJ);
2234: MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
2235: PetscLogObjectParent(B,b->A);
2236: MatCreate(PETSC_COMM_SELF,&b->B);
2237: MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);
2238: MatSetType(b->B,MATSEQBAIJ);
2239: MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
2240: PetscLogObjectParent(B,b->B);
2242: MatStashCreate_Private(B->comm,bs,&B->bstash);
2244: return(0);
2245: }
2249: EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2250: EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2253: /*MC
2254: MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2256: Options Database Keys:
2257: + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2258: . -mat_block_size <bs> - set the blocksize used to store the matrix
2259: - -mat_use_hash_table <fact>
2261: Level: beginner
2263: .seealso: MatCreateMPIBAIJ
2264: M*/
2269: PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2270: {
2271: Mat_MPIBAIJ *b;
2273: PetscTruth flg;
2276: PetscNew(Mat_MPIBAIJ,&b);
2277: B->data = (void*)b;
2280: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2281: B->mapping = 0;
2282: B->factor = 0;
2283: B->assembled = PETSC_FALSE;
2285: B->insertmode = NOT_SET_VALUES;
2286: MPI_Comm_rank(B->comm,&b->rank);
2287: MPI_Comm_size(B->comm,&b->size);
2289: /* build local table of row and column ownerships */
2290: PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);
2292: /* build cache for off array entries formed */
2293: MatStashCreate_Private(B->comm,1,&B->stash);
2294: b->donotstash = PETSC_FALSE;
2295: b->colmap = PETSC_NULL;
2296: b->garray = PETSC_NULL;
2297: b->roworiented = PETSC_TRUE;
2299: #if defined(PETSC_USE_MAT_SINGLE)
2300: /* stuff for MatSetValues_XXX in single precision */
2301: b->setvalueslen = 0;
2302: b->setvaluescopy = PETSC_NULL;
2303: #endif
2305: /* stuff used in block assembly */
2306: b->barray = 0;
2308: /* stuff used for matrix vector multiply */
2309: b->lvec = 0;
2310: b->Mvctx = 0;
2312: /* stuff for MatGetRow() */
2313: b->rowindices = 0;
2314: b->rowvalues = 0;
2315: b->getrowactive = PETSC_FALSE;
2317: /* hash table stuff */
2318: b->ht = 0;
2319: b->hd = 0;
2320: b->ht_size = 0;
2321: b->ht_flag = PETSC_FALSE;
2322: b->ht_fact = 0;
2323: b->ht_total_ct = 0;
2324: b->ht_insert_ct = 0;
2326: PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");
2327: PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);
2328: if (flg) {
2329: PetscReal fact = 1.39;
2330: MatSetOption(B,MAT_USE_HASH_TABLE);
2331: PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);
2332: if (fact <= 1.0) fact = 1.39;
2333: MatMPIBAIJSetHashTableFactor(B,fact);
2334: PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);
2335: }
2336: PetscOptionsEnd();
2338: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2339: "MatStoreValues_MPIBAIJ",
2340: MatStoreValues_MPIBAIJ);
2341: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2342: "MatRetrieveValues_MPIBAIJ",
2343: MatRetrieveValues_MPIBAIJ);
2344: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2345: "MatGetDiagonalBlock_MPIBAIJ",
2346: MatGetDiagonalBlock_MPIBAIJ);
2347: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2348: "MatMPIBAIJSetPreallocation_MPIBAIJ",
2349: MatMPIBAIJSetPreallocation_MPIBAIJ);
2350: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2351: "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2352: MatMPIBAIJSetPreallocationCSR_MPIBAIJ);
2353: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2354: "MatDiagonalScaleLocal_MPIBAIJ",
2355: MatDiagonalScaleLocal_MPIBAIJ);
2356: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2357: "MatSetHashTableFactor_MPIBAIJ",
2358: MatSetHashTableFactor_MPIBAIJ);
2359: PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);
2360: return(0);
2361: }
2364: /*MC
2365: MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2367: This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2368: and MATMPIBAIJ otherwise.
2370: Options Database Keys:
2371: . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2373: Level: beginner
2375: .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2376: M*/
2381: PetscErrorCode MatCreate_BAIJ(Mat A)
2382: {
2384: PetscMPIInt size;
2387: MPI_Comm_size(A->comm,&size);
2388: if (size == 1) {
2389: MatSetType(A,MATSEQBAIJ);
2390: } else {
2391: MatSetType(A,MATMPIBAIJ);
2392: }
2393: return(0);
2394: }
2399: /*@C
2400: MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2401: (block compressed row). For good matrix assembly performance
2402: the user should preallocate the matrix storage by setting the parameters
2403: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2404: performance can be increased by more than a factor of 50.
2406: Collective on Mat
2408: Input Parameters:
2409: + A - the matrix
2410: . bs - size of blockk
2411: . d_nz - number of block nonzeros per block row in diagonal portion of local
2412: submatrix (same for all local rows)
2413: . d_nnz - array containing the number of block nonzeros in the various block rows
2414: of the in diagonal portion of the local (possibly different for each block
2415: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
2416: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2417: submatrix (same for all local rows).
2418: - o_nnz - array containing the number of nonzeros in the various block rows of the
2419: off-diagonal portion of the local submatrix (possibly different for
2420: each block row) or PETSC_NULL.
2422: If the *_nnz parameter is given then the *_nz parameter is ignored
2424: Options Database Keys:
2425: + -mat_block_size - size of the blocks to use
2426: - -mat_use_hash_table <fact>
2428: Notes:
2429: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2430: than it must be used on all processors that share the object for that argument.
2432: Storage Information:
2433: For a square global matrix we define each processor's diagonal portion
2434: to be its local rows and the corresponding columns (a square submatrix);
2435: each processor's off-diagonal portion encompasses the remainder of the
2436: local matrix (a rectangular submatrix).
2438: The user can specify preallocated storage for the diagonal part of
2439: the local submatrix with either d_nz or d_nnz (not both). Set
2440: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2441: memory allocation. Likewise, specify preallocated storage for the
2442: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2444: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2445: the figure below we depict these three local rows and all columns (0-11).
2447: .vb
2448: 0 1 2 3 4 5 6 7 8 9 10 11
2449: -------------------
2450: row 3 | o o o d d d o o o o o o
2451: row 4 | o o o d d d o o o o o o
2452: row 5 | o o o d d d o o o o o o
2453: -------------------
2454: .ve
2455:
2456: Thus, any entries in the d locations are stored in the d (diagonal)
2457: submatrix, and any entries in the o locations are stored in the
2458: o (off-diagonal) submatrix. Note that the d and the o submatrices are
2459: stored simply in the MATSEQBAIJ format for compressed row storage.
2461: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2462: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2463: In general, for PDE problems in which most nonzeros are near the diagonal,
2464: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2465: or you will get TERRIBLE performance; see the users' manual chapter on
2466: matrices.
2468: Level: intermediate
2470: .keywords: matrix, block, aij, compressed row, sparse, parallel
2472: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2473: @*/
2474: PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2475: {
2476: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2479: PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);
2480: if (f) {
2481: (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
2482: }
2483: return(0);
2484: }
2488: /*@C
2489: MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2490: (block compressed row). For good matrix assembly performance
2491: the user should preallocate the matrix storage by setting the parameters
2492: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2493: performance can be increased by more than a factor of 50.
2495: Collective on MPI_Comm
2497: Input Parameters:
2498: + comm - MPI communicator
2499: . bs - size of blockk
2500: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2501: This value should be the same as the local size used in creating the
2502: y vector for the matrix-vector product y = Ax.
2503: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2504: This value should be the same as the local size used in creating the
2505: x vector for the matrix-vector product y = Ax.
2506: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2507: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2508: . d_nz - number of nonzero blocks per block row in diagonal portion of local
2509: submatrix (same for all local rows)
2510: . d_nnz - array containing the number of nonzero blocks in the various block rows
2511: of the in diagonal portion of the local (possibly different for each block
2512: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
2513: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
2514: submatrix (same for all local rows).
2515: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
2516: off-diagonal portion of the local submatrix (possibly different for
2517: each block row) or PETSC_NULL.
2519: Output Parameter:
2520: . A - the matrix
2522: Options Database Keys:
2523: + -mat_block_size - size of the blocks to use
2524: - -mat_use_hash_table <fact>
2526: Notes:
2527: If the *_nnz parameter is given then the *_nz parameter is ignored
2529: A nonzero block is any block that as 1 or more nonzeros in it
2531: The user MUST specify either the local or global matrix dimensions
2532: (possibly both).
2534: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2535: than it must be used on all processors that share the object for that argument.
2537: Storage Information:
2538: For a square global matrix we define each processor's diagonal portion
2539: to be its local rows and the corresponding columns (a square submatrix);
2540: each processor's off-diagonal portion encompasses the remainder of the
2541: local matrix (a rectangular submatrix).
2543: The user can specify preallocated storage for the diagonal part of
2544: the local submatrix with either d_nz or d_nnz (not both). Set
2545: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2546: memory allocation. Likewise, specify preallocated storage for the
2547: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2549: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2550: the figure below we depict these three local rows and all columns (0-11).
2552: .vb
2553: 0 1 2 3 4 5 6 7 8 9 10 11
2554: -------------------
2555: row 3 | o o o d d d o o o o o o
2556: row 4 | o o o d d d o o o o o o
2557: row 5 | o o o d d d o o o o o o
2558: -------------------
2559: .ve
2560:
2561: Thus, any entries in the d locations are stored in the d (diagonal)
2562: submatrix, and any entries in the o locations are stored in the
2563: o (off-diagonal) submatrix. Note that the d and the o submatrices are
2564: stored simply in the MATSEQBAIJ format for compressed row storage.
2566: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2567: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2568: In general, for PDE problems in which most nonzeros are near the diagonal,
2569: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2570: or you will get TERRIBLE performance; see the users' manual chapter on
2571: matrices.
2573: Level: intermediate
2575: .keywords: matrix, block, aij, compressed row, sparse, parallel
2577: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2578: @*/
2579: PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2580: {
2582: PetscMPIInt size;
2585: MatCreate(comm,A);
2586: MatSetSizes(*A,m,n,M,N);
2587: MPI_Comm_size(comm,&size);
2588: if (size > 1) {
2589: MatSetType(*A,MATMPIBAIJ);
2590: MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
2591: } else {
2592: MatSetType(*A,MATSEQBAIJ);
2593: MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
2594: }
2595: return(0);
2596: }
2600: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2601: {
2602: Mat mat;
2603: Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2605: PetscInt len=0;
2608: *newmat = 0;
2609: MatCreate(matin->comm,&mat);
2610: MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);
2611: MatSetType(mat,matin->type_name);
2612: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
2614: mat->factor = matin->factor;
2615: mat->preallocated = PETSC_TRUE;
2616: mat->assembled = PETSC_TRUE;
2617: mat->insertmode = NOT_SET_VALUES;
2619: a = (Mat_MPIBAIJ*)mat->data;
2620: mat->rmap.bs = matin->rmap.bs;
2621: a->bs2 = oldmat->bs2;
2622: a->mbs = oldmat->mbs;
2623: a->nbs = oldmat->nbs;
2624: a->Mbs = oldmat->Mbs;
2625: a->Nbs = oldmat->Nbs;
2626:
2627: PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);
2628: PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);
2630: a->size = oldmat->size;
2631: a->rank = oldmat->rank;
2632: a->donotstash = oldmat->donotstash;
2633: a->roworiented = oldmat->roworiented;
2634: a->rowindices = 0;
2635: a->rowvalues = 0;
2636: a->getrowactive = PETSC_FALSE;
2637: a->barray = 0;
2638: a->rstartbs = oldmat->rstartbs;
2639: a->rendbs = oldmat->rendbs;
2640: a->cstartbs = oldmat->cstartbs;
2641: a->cendbs = oldmat->cendbs;
2643: /* hash table stuff */
2644: a->ht = 0;
2645: a->hd = 0;
2646: a->ht_size = 0;
2647: a->ht_flag = oldmat->ht_flag;
2648: a->ht_fact = oldmat->ht_fact;
2649: a->ht_total_ct = 0;
2650: a->ht_insert_ct = 0;
2652: PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));
2653: MatStashCreate_Private(matin->comm,1,&mat->stash);
2654: MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);
2655: if (oldmat->colmap) {
2656: #if defined (PETSC_USE_CTABLE)
2657: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2658: #else
2659: PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);
2660: PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2661: PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));
2662: #endif
2663: } else a->colmap = 0;
2665: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2666: PetscMalloc(len*sizeof(PetscInt),&a->garray);
2667: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2668: PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));
2669: } else a->garray = 0;
2670:
2671: VecDuplicate(oldmat->lvec,&a->lvec);
2672: PetscLogObjectParent(mat,a->lvec);
2673: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2674: PetscLogObjectParent(mat,a->Mvctx);
2676: MatDuplicate(oldmat->A,cpvalues,&a->A);
2677: PetscLogObjectParent(mat,a->A);
2678: MatDuplicate(oldmat->B,cpvalues,&a->B);
2679: PetscLogObjectParent(mat,a->B);
2680: PetscFListDuplicate(matin->qlist,&mat->qlist);
2681: *newmat = mat;
2683: return(0);
2684: }
2686: #include petscsys.h
2690: PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2691: {
2692: Mat A;
2694: int fd;
2695: PetscInt i,nz,j,rstart,rend;
2696: PetscScalar *vals,*buf;
2697: MPI_Comm comm = ((PetscObject)viewer)->comm;
2698: MPI_Status status;
2699: PetscMPIInt rank,size,maxnz;
2700: PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2701: PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2702: PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2703: PetscMPIInt tag = ((PetscObject)viewer)->tag;
2704: PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2705: PetscInt dcount,kmax,k,nzcount,tmp,mend;
2708: PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");
2709: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2710: PetscOptionsEnd();
2712: MPI_Comm_size(comm,&size);
2713: MPI_Comm_rank(comm,&rank);
2714: if (!rank) {
2715: PetscViewerBinaryGetDescriptor(viewer,&fd);
2716: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
2717: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2718: }
2720: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
2721: M = header[1]; N = header[2];
2723: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2725: /*
2726: This code adds extra rows to make sure the number of rows is
2727: divisible by the blocksize
2728: */
2729: Mbs = M/bs;
2730: extra_rows = bs - M + bs*Mbs;
2731: if (extra_rows == bs) extra_rows = 0;
2732: else Mbs++;
2733: if (extra_rows && !rank) {
2734: PetscInfo(0,"Padding loaded matrix to match blocksize\n");
2735: }
2737: /* determine ownership of all rows */
2738: mbs = Mbs/size + ((Mbs % size) > rank);
2739: m = mbs*bs;
2740: PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);
2741: MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
2743: /* process 0 needs enough room for process with most rows */
2744: if (!rank) {
2745: mmax = rowners[1];
2746: for (i=2; i<size; i++) {
2747: mmax = PetscMax(mmax,rowners[i]);
2748: }
2749: mmax*=bs;
2750: } else mmax = m;
2752: rowners[0] = 0;
2753: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2754: for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2755: rstart = rowners[rank];
2756: rend = rowners[rank+1];
2758: /* distribute row lengths to all processors */
2759: PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);
2760: if (!rank) {
2761: mend = m;
2762: if (size == 1) mend = mend - extra_rows;
2763: PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);
2764: for (j=mend; j<m; j++) locrowlens[j] = 1;
2765: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
2766: PetscMalloc(size*sizeof(PetscInt),&procsnz);
2767: PetscMemzero(procsnz,size*sizeof(PetscInt));
2768: for (j=0; j<m; j++) {
2769: procsnz[0] += locrowlens[j];
2770: }
2771: for (i=1; i<size; i++) {
2772: mend = browners[i+1] - browners[i];
2773: if (i == size-1) mend = mend - extra_rows;
2774: PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);
2775: for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2776: /* calculate the number of nonzeros on each processor */
2777: for (j=0; j<browners[i+1]-browners[i]; j++) {
2778: procsnz[i] += rowlengths[j];
2779: }
2780: MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);
2781: }
2782: PetscFree(rowlengths);
2783: } else {
2784: MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);
2785: }
2787: if (!rank) {
2788: /* determine max buffer needed and allocate it */
2789: maxnz = procsnz[0];
2790: for (i=1; i<size; i++) {
2791: maxnz = PetscMax(maxnz,procsnz[i]);
2792: }
2793: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
2795: /* read in my part of the matrix column indices */
2796: nz = procsnz[0];
2797: PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);
2798: mycols = ibuf;
2799: if (size == 1) nz -= extra_rows;
2800: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2801: if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2803: /* read in every ones (except the last) and ship off */
2804: for (i=1; i<size-1; i++) {
2805: nz = procsnz[i];
2806: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2807: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2808: }
2809: /* read in the stuff for the last proc */
2810: if (size != 1) {
2811: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
2812: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2813: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2814: MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
2815: }
2816: PetscFree(cols);
2817: } else {
2818: /* determine buffer space needed for message */
2819: nz = 0;
2820: for (i=0; i<m; i++) {
2821: nz += locrowlens[i];
2822: }
2823: PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);
2824: mycols = ibuf;
2825: /* receive message of column indices*/
2826: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2827: MPI_Get_count(&status,MPIU_INT,&maxnz);
2828: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2829: }
2830:
2831: /* loop over local rows, determining number of off diagonal entries */
2832: PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);
2833: PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);
2834: PetscMemzero(mask,Mbs*sizeof(PetscInt));
2835: PetscMemzero(masked1,Mbs*sizeof(PetscInt));
2836: PetscMemzero(masked2,Mbs*sizeof(PetscInt));
2837: rowcount = 0; nzcount = 0;
2838: for (i=0; i<mbs; i++) {
2839: dcount = 0;
2840: odcount = 0;
2841: for (j=0; j<bs; j++) {
2842: kmax = locrowlens[rowcount];
2843: for (k=0; k<kmax; k++) {
2844: tmp = mycols[nzcount++]/bs;
2845: if (!mask[tmp]) {
2846: mask[tmp] = 1;
2847: if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2848: else masked1[dcount++] = tmp;
2849: }
2850: }
2851: rowcount++;
2852: }
2853:
2854: dlens[i] = dcount;
2855: odlens[i] = odcount;
2857: /* zero out the mask elements we set */
2858: for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2859: for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2860: }
2862: /* create our matrix */
2863: MatCreate(comm,&A);
2864: MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);
2865: MatSetType(A,type);CHKERRQ(ierr)
2866: MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);
2868: /* Why doesn't this called using MatSetOption(A,MAT_COLUMNS_SORTED); */
2869: MatSetOption(A,MAT_COLUMNS_SORTED);
2870:
2871: if (!rank) {
2872: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);
2873: /* read in my part of the matrix numerical values */
2874: nz = procsnz[0];
2875: vals = buf;
2876: mycols = ibuf;
2877: if (size == 1) nz -= extra_rows;
2878: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2879: if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2881: /* insert into matrix */
2882: jj = rstart*bs;
2883: for (i=0; i<m; i++) {
2884: MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2885: mycols += locrowlens[i];
2886: vals += locrowlens[i];
2887: jj++;
2888: }
2889: /* read in other processors (except the last one) and ship out */
2890: for (i=1; i<size-1; i++) {
2891: nz = procsnz[i];
2892: vals = buf;
2893: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2894: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2895: }
2896: /* the last proc */
2897: if (size != 1){
2898: nz = procsnz[i] - extra_rows;
2899: vals = buf;
2900: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2901: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2902: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
2903: }
2904: PetscFree(procsnz);
2905: } else {
2906: /* receive numeric values */
2907: PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);
2909: /* receive message of values*/
2910: vals = buf;
2911: mycols = ibuf;
2912: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2913: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2914: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2916: /* insert into matrix */
2917: jj = rstart*bs;
2918: for (i=0; i<m; i++) {
2919: MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2920: mycols += locrowlens[i];
2921: vals += locrowlens[i];
2922: jj++;
2923: }
2924: }
2925: PetscFree(locrowlens);
2926: PetscFree(buf);
2927: PetscFree(ibuf);
2928: PetscFree2(rowners,browners);
2929: PetscFree2(dlens,odlens);
2930: PetscFree3(mask,masked1,masked2);
2931: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2932: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2934: *newmat = A;
2935: return(0);
2936: }
2940: /*@
2941: MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2943: Input Parameters:
2944: . mat - the matrix
2945: . fact - factor
2947: Collective on Mat
2949: Level: advanced
2951: Notes:
2952: This can also be set by the command line option: -mat_use_hash_table <fact>
2954: .keywords: matrix, hashtable, factor, HT
2956: .seealso: MatSetOption()
2957: @*/
2958: PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2959: {
2960: PetscErrorCode ierr,(*f)(Mat,PetscReal);
2963: PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);
2964: if (f) {
2965: (*f)(mat,fact);
2966: }
2967: return(0);
2968: }
2973: PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2974: {
2975: Mat_MPIBAIJ *baij;
2978: baij = (Mat_MPIBAIJ*)mat->data;
2979: baij->ht_fact = fact;
2980: return(0);
2981: }
2986: PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2987: {
2988: Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2990: *Ad = a->A;
2991: *Ao = a->B;
2992: *colmap = a->garray;
2993: return(0);
2994: }