#define PETSCMAT_DLL #include "src/mat/impls/rowbs/mpi/mpirowbs.h" #define CHUNCKSIZE_LOCAL 10 #undef __FUNCT__ #define __FUNCT__ "MatFreeRowbs_Private" static PetscErrorCode MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v) { PetscErrorCode ierr; PetscFunctionBegin; if (v) { #if defined(PETSC_USE_LOG) int len = -n*(sizeof(int)+sizeof(PetscScalar)); #endif ierr = PetscFree(v);CHKERRQ(ierr); ierr = PetscLogObjectMemory(A,len);CHKERRQ(ierr); } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMallocRowbs_Private" static PetscErrorCode MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v) { PetscErrorCode ierr; int len; PetscFunctionBegin; if (!n) { *i = 0; *v = 0; } else { len = n*(sizeof(int) + sizeof(PetscScalar)); ierr = PetscMalloc(len,v);CHKERRQ(ierr); ierr = PetscLogObjectMemory(A,len);CHKERRQ(ierr); *i = (int*)(*v + n); } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatScale_MPIRowbs" PetscErrorCode MatScale_MPIRowbs(Mat inA,PetscScalar alpha) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)inA->data; BSspmat *A = a->A; BSsprow *vs; PetscScalar *ap; int i,m = inA->rmap.n,nrow,j; PetscErrorCode ierr; PetscFunctionBegin; for (i=0; irows[i]; nrow = vs->length; ap = vs->nz; for (j=0; jnz);CHKERRQ(ierr); PetscFunctionReturn(0); } /* ----------------------------------------------------------------- */ #undef __FUNCT__ #define __FUNCT__ "MatCreateMPIRowbs_local" static PetscErrorCode MatCreateMPIRowbs_local(Mat A,int nz,const int nnz[]) { Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data; PetscErrorCode ierr; int i,len,m = A->rmap.n,*tnnz; BSspmat *bsmat; BSsprow *vs; PetscFunctionBegin; ierr = PetscMalloc((m+1)*sizeof(int),&tnnz);CHKERRQ(ierr); if (!nnz) { if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; if (nz <= 0) nz = 1; for (i=0; iA);CHKERRQ(ierr); bsmat = bsif->A; BSset_mat_icc_storage(bsmat,PETSC_FALSE); BSset_mat_symmetric(bsmat,PETSC_FALSE); len = m*(sizeof(BSsprow*)+ sizeof(BSsprow)) + 1; ierr = PetscMalloc(len,&bsmat->rows);CHKERRQ(ierr); bsmat->num_rows = m; bsmat->global_num_rows = A->rmap.N; bsmat->map = bsif->bsmap; vs = (BSsprow*)(bsmat->rows + m); for (i=0; irows[i] = vs; bsif->imax[i] = tnnz[i]; vs->diag_ind = -1; ierr = MatMallocRowbs_Private(A,tnnz[i],&(vs->col),&(vs->nz));CHKERRQ(ierr); /* put zero on diagonal */ /*vs->length = 1; vs->col[0] = i + bsif->rstart; vs->nz[0] = 0.0;*/ vs->length = 0; vs++; } ierr = PetscLogObjectMemory(A,sizeof(BSspmat) + len);CHKERRQ(ierr); bsif->nz = 0; bsif->maxnz = nz; bsif->sorted = 0; bsif->roworiented = PETSC_TRUE; bsif->nonew = 0; bsif->bs_color_single = 0; ierr = PetscFree(tnnz);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSetValues_MPIRowbs_local" static PetscErrorCode MatSetValues_MPIRowbs_local(Mat AA,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) { Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data; BSspmat *A = mat->A; BSsprow *vs; PetscErrorCode ierr; int *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax; int *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted; PetscScalar *ap,value; PetscFunctionBegin; for (k=0; k= AA->rmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,AA->rmap.n-1); vs = A->rows[row]; ap = vs->nz; rp = vs->col; rmax = imax[row]; nrow = vs->length; a = 0; for (l=0; l= AA->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],AA->cmap.N-1); col = in[l]; value = *v++; if (!sorted) a = 0; b = nrow; while (b-a > 5) { t = (b+a)/2; if (rp[t] > col) b = t; else a = t; } for (i=a; i col) break; if (rp[i] == col) { if (addv == ADD_VALUES) ap[i] += value; else ap[i] = value; goto noinsert; } } if (nonew) goto noinsert; if (nrow >= rmax) { /* there is no extra room in row, therefore enlarge */ int *itemp,*iout,*iin = vs->col; PetscScalar *vout,*vin = vs->nz,*vtemp; /* malloc new storage space */ imax[row] += CHUNCKSIZE_LOCAL; ierr = MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);CHKERRQ(ierr); vout = vtemp; iout = itemp; for (ii=0; ii 0) { ierr = MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);CHKERRQ(ierr); } vs->col = iout; vs->nz = vout; rmax = imax[row]; mat->maxnz += CHUNCKSIZE_LOCAL; mat->reallocs++; } else { /* shift higher columns over to make room for newie */ for (ii=nrow-1; ii>=i; ii--) { rp[ii+1] = rp[ii]; ap[ii+1] = ap[ii]; } rp[i] = col; ap[i] = value; } nrow++; mat->nz++; AA->same_nonzero = PETSC_FALSE; noinsert:; a = i + 1; } vs->length = nrow; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatAssemblyBegin_MPIRowbs_local" static PetscErrorCode MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode) { PetscFunctionBegin; PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatAssemblyEnd_MPIRowbs_local" static PetscErrorCode MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data; BSspmat *A = a->A; BSsprow *vs; int i,j,rstart = AA->rmap.rstart; PetscFunctionBegin; if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); /* Mark location of diagonal */ for (i=0; irmap.n; i++) { vs = A->rows[i]; for (j=0; jlength; j++) { if (vs->col[j] == i + rstart) { vs->diag_ind = j; break; } } if (vs->diag_ind == -1) { SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry"); } } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatZeroRows_MPIRowbs_local" static PetscErrorCode MatZeroRows_MPIRowbs_local(Mat A,PetscInt N,const PetscInt rz[],PetscScalar diag) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data; BSspmat *l = a->A; PetscErrorCode ierr; int i,m = A->rmap.n - 1,col,base=A->rmap.rstart; PetscFunctionBegin; if (a->keepzeroedrows) { for (i=0; i m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range"); ierr = PetscMemzero(l->rows[rz[i]]->nz,l->rows[rz[i]]->length*sizeof(PetscScalar));CHKERRQ(ierr); if (diag != 0.0) { col=rz[i]+base; ierr = MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);CHKERRQ(ierr); } } } else { if (diag != 0.0) { for (i=0; i m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range"); if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */ l->rows[rz[i]]->length = 1; l->rows[rz[i]]->nz[0] = diag; l->rows[rz[i]]->col[0] = A->rmap.rstart + rz[i]; } else { col=rz[i]+base; ierr = MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);CHKERRQ(ierr); } } } else { for (i=0; i m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range"); l->rows[rz[i]]->length = 0; } } A->same_nonzero = PETSC_FALSE; } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatNorm_MPIRowbs_local" static PetscErrorCode MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm) { Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data; BSsprow *vs,**rs; PetscScalar *xv; PetscReal sum = 0.0; PetscErrorCode ierr; int *xi,nz,i,j; PetscFunctionBegin; rs = mat->A->rows; if (type == NORM_FROBENIUS) { for (i=0; irmap.n; i++) { vs = *rs++; nz = vs->length; xv = vs->nz; while (nz--) { #if defined(PETSC_USE_COMPLEX) sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++; #else sum += (*xv)*(*xv); xv++; #endif } } *norm = sqrt(sum); } else if (type == NORM_1) { /* max column norm */ PetscReal *tmp; ierr = PetscMalloc(A->cmap.n*sizeof(PetscReal),&tmp);CHKERRQ(ierr); ierr = PetscMemzero(tmp,A->cmap.n*sizeof(PetscReal));CHKERRQ(ierr); *norm = 0.0; for (i=0; irmap.n; i++) { vs = *rs++; nz = vs->length; xi = vs->col; xv = vs->nz; while (nz--) { tmp[*xi] += PetscAbsScalar(*xv); xi++; xv++; } } for (j=0; jrmap.n; j++) { if (tmp[j] > *norm) *norm = tmp[j]; } ierr = PetscFree(tmp);CHKERRQ(ierr); } else if (type == NORM_INFINITY) { /* max row norm */ *norm = 0.0; for (i=0; irmap.n; i++) { vs = *rs++; nz = vs->length; xv = vs->nz; sum = 0.0; while (nz--) { sum += PetscAbsScalar(*xv); xv++; } if (sum > *norm) *norm = sum; } } else { SETERRQ(PETSC_ERR_SUP,"No support for the two norm"); } PetscFunctionReturn(0); } /* ----------------------------------------------------------------- */ #undef __FUNCT__ #define __FUNCT__ "MatSetValues_MPIRowbs" PetscErrorCode MatSetValues_MPIRowbs(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode av) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data; PetscErrorCode ierr; int i,j,row,col,rstart = mat->rmap.rstart,rend = mat->rmap.rend; PetscTruth roworiented = a->roworiented; PetscFunctionBegin; /* Note: There's no need to "unscale" the matrix, since scaling is confined to a->pA, and we're working with a->A here */ for (i=0; i= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->rmap.N-1); if (im[i] >= rstart && im[i] < rend) { row = im[i] - rstart; for (j=0; j= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->cmap.N-1); if (in[j] >= 0 && in[j] < mat->cmap.N){ col = in[j]; if (roworiented) { ierr = MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);CHKERRQ(ierr); } else { ierr = MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);CHKERRQ(ierr); } } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");} } } else { if (!a->donotstash) { if (roworiented) { ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); } else { ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); } } } } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatAssemblyBegin_MPIRowbs" PetscErrorCode MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode) { MPI_Comm comm = mat->comm; PetscErrorCode ierr; int nstash,reallocs; InsertMode addv; PetscFunctionBegin; /* Note: There's no need to "unscale" the matrix, since scaling is confined to a->pA, and we're working with a->A here */ /* make sure all processors are either in INSERTMODE or ADDMODE */ ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added"); } mat->insertmode = addv; /* in case this processor had no cache */ ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); ierr = PetscInfo2(0,"Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatView_MPIRowbs_ASCII" static PetscErrorCode MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data; PetscErrorCode ierr; int i,j; PetscTruth iascii; BSspmat *A = a->A; BSsprow **rs = A->rows; PetscViewerFormat format; PetscFunctionBegin; ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { int ind_l,ind_g,clq_l,clq_g,color; ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0); ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0); clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0); clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0); color = BSnum_colors(a->pA);CHKERRBS(0); ierr = PetscViewerASCIIPrintf(viewer," %d global inode(s), %d global clique(s), %d color(s)\n",ind_g,clq_g,color);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d local inode(s), %d local clique(s)\n",a->rank,ind_l,clq_l); } else if (format == PETSC_VIEWER_ASCII_COMMON) { for (i=0; inum_rows; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+mat->rmap.rstart);CHKERRQ(ierr); for (j=0; jlength; j++) { if (rs[i]->nz[j]) {ierr = PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);CHKERRQ(ierr);} } ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } } else if (format == PETSC_VIEWER_ASCII_MATLAB) { SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); } else { ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); for (i=0; inum_rows; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+mat->rmap.rstart);CHKERRQ(ierr); for (j=0; jlength; j++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);CHKERRQ(ierr); } ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatView_MPIRowbs_Binary" static PetscErrorCode MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data; PetscErrorCode ierr; PetscMPIInt rank,size; PetscInt i,M,m,*sbuff,*rowlengths; PetscInt *recvcts,*recvdisp,fd,*cols,maxnz,nz,j; BSspmat *A = a->A; BSsprow **rs = A->rows; MPI_Comm comm = mat->comm; MPI_Status status; PetscScalar *vals; MatInfo info; PetscFunctionBegin; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); M = mat->rmap.N; m = mat->rmap.n; /* First gather together on the first processor the lengths of each row, and write them out to the file */ ierr = PetscMalloc(m*sizeof(int),&sbuff);CHKERRQ(ierr); for (i=0; inum_rows; i++) { sbuff[i] = rs[i]->length; } ierr = MatGetInfo(mat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); if (!rank) { ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); ierr = PetscMalloc((4+M)*sizeof(int),&rowlengths);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(int),&recvcts);CHKERRQ(ierr); recvdisp = mat->rmap.range; for (i=0; icookie; rowlengths[1] = mat->rmap.N; rowlengths[2] = mat->cmap.N; rowlengths[3] = (int)info.nz_used; ierr = MPI_Gatherv(sbuff,m,MPI_INT,rowlengths+4,recvcts,recvdisp,MPI_INT,0,comm);CHKERRQ(ierr); ierr = PetscFree(sbuff);CHKERRQ(ierr); ierr = PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); /* count the number of nonzeros on each processor */ ierr = PetscMemzero(recvcts,size*sizeof(int));CHKERRQ(ierr); for (i=0; inum_rows; i++) { for (j=0; jlength; j++) { cols[nz++] = rs[i]->col[j]; } } ierr = PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); /* receive and store column indices for all other processors */ for (i=1; itag,comm,&status);CHKERRQ(ierr); ierr = MPI_Get_count(&status,MPI_INT,&nz);CHKERRQ(ierr); ierr = PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); } ierr = PetscFree(cols);CHKERRQ(ierr); ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); /* binary store values for 0th processor */ nz = 0; for (i=0; inum_rows; i++) { for (j=0; jlength; j++) { vals[nz++] = rs[i]->nz[j]; } } ierr = PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); /* receive and store nonzeros for all other processors */ for (i=1; itag,comm,&status);CHKERRQ(ierr); ierr = MPI_Get_count(&status,MPIU_SCALAR,&nz);CHKERRQ(ierr); ierr = PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); } ierr = PetscFree(vals);CHKERRQ(ierr); } else { ierr = MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);CHKERRQ(ierr); ierr = PetscFree(sbuff);CHKERRQ(ierr); /* count local nonzeros */ nz = 0; for (i=0; inum_rows; i++) { for (j=0; jlength; j++) { nz++; } } /* copy into buffer column indices */ ierr = PetscMalloc(nz*sizeof(int),&cols);CHKERRQ(ierr); nz = 0; for (i=0; inum_rows; i++) { for (j=0; jlength; j++) { cols[nz++] = rs[i]->col[j]; } } /* send */ /* should wait until processor zero tells me to go */ ierr = MPI_Send(cols,nz,MPI_INT,0,mat->tag,comm);CHKERRQ(ierr); ierr = PetscFree(cols);CHKERRQ(ierr); /* copy into buffer column values */ ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); nz = 0; for (i=0; inum_rows; i++) { for (j=0; jlength; j++) { vals[nz++] = rs[i]->nz[j]; } } /* send */ /* should wait until processor zero tells me to go */ ierr = MPI_Send(vals,nz,MPIU_SCALAR,0,mat->tag,comm);CHKERRQ(ierr); ierr = PetscFree(vals);CHKERRQ(ierr); } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatView_MPIRowbs" PetscErrorCode MatView_MPIRowbs(Mat mat,PetscViewer viewer) { Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data; PetscErrorCode ierr; PetscTruth iascii,isbinary; PetscFunctionBegin; ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); if (!bsif->blocksolveassembly) { ierr = MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);CHKERRQ(ierr); } if (iascii) { ierr = MatView_MPIRowbs_ASCII(mat,viewer);CHKERRQ(ierr); } else if (isbinary) { ierr = MatView_MPIRowbs_Binary(mat,viewer);CHKERRQ(ierr); } else { SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name); } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatAssemblyEnd_MPIRowbs_MakeSymmetric" static PetscErrorCode MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data; BSspmat *A = a->A; BSsprow *vs; int size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs; int msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,idx,row; PetscErrorCode ierr; int ctr_j,*sbuf1_j,k; PetscScalar val=0.0; MPI_Comm comm; MPI_Request *s_waits1,*r_waits1; MPI_Status *s_status,*r_status; PetscFunctionBegin; comm = mat->comm; tag = mat->tag; size = a->size; rank = a->rank; M = mat->rmap.N; rstart = mat->rmap.rstart; ierr = PetscMalloc(M*sizeof(int),&rtable);CHKERRQ(ierr); /* Create hash table for the mapping :row -> proc */ for (i=0,j=0; irmap.range[i+1]; for (; jrmap.n; i++) { ierr = PetscMemzero(w4,size*sizeof(int));CHKERRQ(ierr); /* initialize work vector */ vs = A->rows[i]; for (j=0; jlength; j++) { proc = rtable[vs->col[j]]; w4[proc]++; } for (j=0; jrmap.n; i++) { ierr = PetscMemzero(ctr,size*sizeof(int));CHKERRQ(ierr); vs = A->rows[i]; for (j=0; jlength; j++) { col = vs->col[j]; proc = rtable[col]; if (proc != rank) { /* copy to the outgoing buffer */ ctr[proc]++; *ptr[proc] = col; ptr[proc]++; } else { row = col - rstart; col = i + rstart; ierr = MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);CHKERRQ(ierr); } } /* Update the headers for the current row */ for (j=0; jdata; PetscErrorCode ierr; int ldim,low,high,i; PetscScalar *diag; PetscFunctionBegin; if ((mat->was_assembled) && (!mat->same_nonzero)) { /* Free the old info */ if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);} if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);} } if ((!mat->same_nonzero) || (!mat->was_assembled)) { /* Indicates bypassing cliques in coloring */ if (a->bs_color_single) { BSctx_set_si(a->procinfo,100); } /* Form permuted matrix for efficient parallel execution */ a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0); /* Set up the communication */ a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0); } else { /* Repermute the matrix */ BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0); } /* Symmetrically scale the matrix by the diagonal */ BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0); /* Store inverse of square root of permuted diagonal scaling matrix */ ierr = VecGetLocalSize(a->diag,&ldim);CHKERRQ(ierr); ierr = VecGetOwnershipRange(a->diag,&low,&high);CHKERRQ(ierr); ierr = VecGetArray(a->diag,&diag);CHKERRQ(ierr); for (i=0; ipA->scale_diag[i] != 0.0) { diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i])); } else { diag[i] = 1.0; } } ierr = VecRestoreArray(a->diag,&diag);CHKERRQ(ierr); a->assembled_icc_storage = a->A->icc_storage; a->blocksolveassembly = 1; mat->was_assembled = PETSC_TRUE; mat->same_nonzero = PETSC_TRUE; ierr = PetscInfo(mat,"Completed BlockSolve95 matrix assembly\n");CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatAssemblyEnd_MPIRowbs" PetscErrorCode MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data; PetscErrorCode ierr; int i,n,row,col,*rows,*cols,rstart,nzcount,flg,j,ncols; PetscScalar *vals,val; InsertMode addv = mat->insertmode; PetscFunctionBegin; while (1) { ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);CHKERRQ(ierr); if (!flg) break; for (i=0; istash);CHKERRQ(ierr); rstart = mat->rmap.rstart; nzcount = a->nz; /* This is the number of nonzeros entered by the user */ /* BlockSolve requires that the matrix is structurally symmetric */ if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) { ierr = MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);CHKERRQ(ierr); } /* BlockSolve requires that all the diagonal elements are set */ val = 0.0; for (i=0; irmap.n; i++) { row = i; col = i + rstart; ierr = MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin_MPIRowbs_local(mat,mode);CHKERRQ(ierr); ierr = MatAssemblyEnd_MPIRowbs_local(mat,mode);CHKERRQ(ierr); a->blocksolveassembly = 0; ierr = PetscInfo4(mat,"Matrix size: %d X %d; storage space: %d unneeded,%d used\n",mat->rmap.n,mat->cmap.n,a->maxnz-a->nz,a->nz);CHKERRQ(ierr); ierr = PetscInfo2(mat,"User entered %d nonzeros, PETSc added %d\n",nzcount,a->nz-nzcount);CHKERRQ(ierr); ierr = PetscInfo1(mat,"Number of mallocs during MatSetValues is %d\n",a->reallocs);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatZeroEntries_MPIRowbs" PetscErrorCode MatZeroEntries_MPIRowbs(Mat mat) { Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data; BSspmat *A = l->A; BSsprow *vs; int i,j; PetscFunctionBegin; for (i=0; i rmap.n; i++) { vs = A->rows[i]; for (j=0; j< vs->length; j++) vs->nz[j] = 0.0; } PetscFunctionReturn(0); } /* the code does not do the diagonal entries correctly unless the matrix is square and the column and row owerships are identical. This is a BUG. */ #undef __FUNCT__ #define __FUNCT__ "MatZeroRows_MPIRowbs" PetscErrorCode MatZeroRows_MPIRowbs(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) { Mat_MPIRowbs *l = (Mat_MPIRowbs*)A->data; PetscErrorCode ierr; int i,*owners = A->rmap.range,size = l->size; int *nprocs,j,idx,nsends; int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; int *rvalues,tag = A->tag,count,base,slen,n,*source; int *lens,imdex,*lrows,*values; MPI_Comm comm = A->comm; MPI_Request *send_waits,*recv_waits; MPI_Status recv_status,*send_status; PetscTruth found; PetscFunctionBegin; /* first count number of contributors to each processor */ ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ for (i=0; i= owners[j] && idx < owners[j+1]) { nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; } } if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); } nsends = 0; for (i=0; idata; BSsprow *vs,**rs; PetscScalar *xv; PetscReal sum = 0.0; PetscErrorCode ierr; int *xi,nz,i,j; PetscFunctionBegin; if (a->size == 1) { ierr = MatNorm_MPIRowbs_local(mat,type,norm);CHKERRQ(ierr); } else { rs = a->A->rows; if (type == NORM_FROBENIUS) { for (i=0; irmap.n; i++) { vs = *rs++; nz = vs->length; xv = vs->nz; while (nz--) { #if defined(PETSC_USE_COMPLEX) sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++; #else sum += (*xv)*(*xv); xv++; #endif } } ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); *norm = sqrt(*norm); } else if (type == NORM_1) { /* max column norm */ PetscReal *tmp,*tmp2; ierr = PetscMalloc(mat->cmap.n*sizeof(PetscReal),&tmp);CHKERRQ(ierr); ierr = PetscMalloc(mat->cmap.n*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); ierr = PetscMemzero(tmp,mat->cmap.n*sizeof(PetscReal));CHKERRQ(ierr); *norm = 0.0; for (i=0; irmap.n; i++) { vs = *rs++; nz = vs->length; xi = vs->col; xv = vs->nz; while (nz--) { tmp[*xi] += PetscAbsScalar(*xv); xi++; xv++; } } ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); for (j=0; jcmap.n; j++) { if (tmp2[j] > *norm) *norm = tmp2[j]; } ierr = PetscFree(tmp);CHKERRQ(ierr); ierr = PetscFree(tmp2);CHKERRQ(ierr); } else if (type == NORM_INFINITY) { /* max row norm */ PetscReal ntemp = 0.0; for (i=0; irmap.n; i++) { vs = *rs++; nz = vs->length; xv = vs->nz; sum = 0.0; while (nz--) { sum += PetscAbsScalar(*xv); xv++; } if (sum > ntemp) ntemp = sum; } ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); } else { SETERRQ(PETSC_ERR_SUP,"No support for two norm"); } } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMult_MPIRowbs" PetscErrorCode MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy) { Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data; BSprocinfo *bspinfo = bsif->procinfo; PetscScalar *xxa,*xworka,*yya; PetscErrorCode ierr; PetscFunctionBegin; if (!bsif->blocksolveassembly) { ierr = MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);CHKERRQ(ierr); } /* Permute and apply diagonal scaling: [ xwork = D^{1/2} * x ] */ if (!bsif->vecs_permscale) { ierr = VecGetArray(bsif->xwork,&xworka);CHKERRQ(ierr); ierr = VecGetArray(xx,&xxa);CHKERRQ(ierr); BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0); ierr = VecRestoreArray(bsif->xwork,&xworka);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&xxa);CHKERRQ(ierr); ierr = VecPointwiseDivide(xx,bsif->xwork,bsif->diag);CHKERRQ(ierr); } ierr = VecGetArray(xx,&xxa);CHKERRQ(ierr); ierr = VecGetArray(yy,&yya);CHKERRQ(ierr); /* Do lower triangular multiplication: [ y = L * xwork ] */ if (bspinfo->single) { BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0); } else { BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0); } /* Do upper triangular multiplication: [ y = y + L^{T} * xwork ] */ if (mat->symmetric) { if (bspinfo->single){ BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0); } else { BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0); } } /* not needed for ILU version since forward does it all */ ierr = VecRestoreArray(xx,&xxa);CHKERRQ(ierr); ierr = VecRestoreArray(yy,&yya);CHKERRQ(ierr); /* Apply diagonal scaling to vector: [ y = D^{1/2} * y ] */ if (!bsif->vecs_permscale) { ierr = VecGetArray(bsif->xwork,&xworka);CHKERRQ(ierr); ierr = VecGetArray(xx,&xxa);CHKERRQ(ierr); BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0); ierr = VecRestoreArray(bsif->xwork,&xworka);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&xxa);CHKERRQ(ierr); ierr = VecPointwiseDivide(bsif->xwork,yy,bsif->diag);CHKERRQ(ierr); ierr = VecGetArray(bsif->xwork,&xworka);CHKERRQ(ierr); ierr = VecGetArray(yy,&yya);CHKERRQ(ierr); BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0); ierr = VecRestoreArray(bsif->xwork,&xworka);CHKERRQ(ierr); ierr = VecRestoreArray(yy,&yya);CHKERRQ(ierr); } ierr = PetscLogFlops(2*bsif->nz - mat->cmap.n);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultAdd_MPIRowbs" PetscErrorCode MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz) { PetscErrorCode ierr; PetscScalar one = 1.0; PetscFunctionBegin; ierr = (*mat->ops->mult)(mat,xx,zz);CHKERRQ(ierr); ierr = VecAXPY(zz,one,yy);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetInfo_MPIRowbs" PetscErrorCode MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info) { Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data; PetscReal isend[5],irecv[5]; PetscErrorCode ierr; PetscFunctionBegin; info->rows_global = (double)A->rmap.N; info->columns_global = (double)A->cmap.N; info->rows_local = (double)A->cmap.n; info->columns_local = (double)A->rmap.n; info->block_size = 1.0; info->mallocs = (double)mat->reallocs; isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] = mat->maxnz - mat->nz; isend[3] = A->mem; isend[4] = info->mallocs; if (flag == MAT_LOCAL) { info->nz_used = isend[0]; info->nz_allocated = isend[1]; info->nz_unneeded = isend[2]; info->memory = isend[3]; info->mallocs = isend[4]; } else if (flag == MAT_GLOBAL_MAX) { ierr = MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); info->nz_used = irecv[0]; info->nz_allocated = irecv[1]; info->nz_unneeded = irecv[2]; info->memory = irecv[3]; info->mallocs = irecv[4]; } else if (flag == MAT_GLOBAL_SUM) { ierr = MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); info->nz_used = irecv[0]; info->nz_allocated = irecv[1]; info->nz_unneeded = irecv[2]; info->memory = irecv[3]; info->mallocs = irecv[4]; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetDiagonal_MPIRowbs" PetscErrorCode MatGetDiagonal_MPIRowbs(Mat mat,Vec v) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data; BSsprow **rs = a->A->rows; PetscErrorCode ierr; int i,n; PetscScalar *x,zero = 0.0; PetscFunctionBegin; if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); if (!a->blocksolveassembly) { ierr = MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);CHKERRQ(ierr); } ierr = VecSet(v,zero);CHKERRQ(ierr); ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); if (n != mat->rmap.n) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); ierr = VecGetArray(v,&x);CHKERRQ(ierr); for (i=0; irmap.n; i++) { x[i] = rs[i]->nz[rs[i]->diag_ind]; } ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatDestroy_MPIRowbs" PetscErrorCode MatDestroy_MPIRowbs(Mat mat) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data; BSspmat *A = a->A; BSsprow *vs; PetscErrorCode ierr; int i; PetscFunctionBegin; #if defined(PETSC_USE_LOG) PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->rmap.N,mat->cmap.N); #endif ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); if (a->bsmap) { ierr = PetscFree(a->bsmap->vlocal2global);CHKERRQ(ierr); ierr = PetscFree(a->bsmap->vglobal2local);CHKERRQ(ierr); if (a->bsmap->vglobal2proc) (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc); ierr = PetscFree(a->bsmap);CHKERRQ(ierr); } if (A) { for (i=0; irmap.n; i++) { vs = A->rows[i]; ierr = MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);CHKERRQ(ierr); } /* Note: A->map = a->bsmap is freed above */ ierr = PetscFree(A->rows);CHKERRQ(ierr); ierr = PetscFree(A);CHKERRQ(ierr); } if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);} if (a->diag) {ierr = VecDestroy(a->diag);CHKERRQ(ierr);} if (a->xwork) {ierr = VecDestroy(a->xwork);CHKERRQ(ierr);} if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);} if (a->fpA) {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);} if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);} if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);} ierr = PetscFree(a->imax);CHKERRQ(ierr); ierr = MPI_Comm_free(&(a->comm_mpirowbs));CHKERRQ(ierr); ierr = PetscFree(a);CHKERRQ(ierr); ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatSetOption_MPIRowbs" PetscErrorCode MatSetOption_MPIRowbs(Mat A,MatOption op) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data; PetscErrorCode ierr; PetscFunctionBegin; switch (op) { case MAT_ROW_ORIENTED: a->roworiented = PETSC_TRUE; break; case MAT_COLUMN_ORIENTED: a->roworiented = PETSC_FALSE; break; case MAT_COLUMNS_SORTED: a->sorted = 1; break; case MAT_COLUMNS_UNSORTED: a->sorted = 0; break; case MAT_NO_NEW_NONZERO_LOCATIONS: a->nonew = 1; break; case MAT_YES_NEW_NONZERO_LOCATIONS: a->nonew = 0; break; case MAT_DO_NOT_USE_INODES: a->bs_color_single = 1; break; case MAT_YES_NEW_DIAGONALS: case MAT_ROWS_SORTED: case MAT_NEW_NONZERO_LOCATION_ERR: case MAT_NEW_NONZERO_ALLOCATION_ERR: case MAT_ROWS_UNSORTED: case MAT_USE_HASH_TABLE: ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); break; case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = PETSC_TRUE; break; case MAT_NO_NEW_DIAGONALS: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); break; case MAT_KEEP_ZEROED_ROWS: a->keepzeroedrows = PETSC_TRUE; break; case MAT_SYMMETRIC: BSset_mat_symmetric(a->A,PETSC_TRUE);CHKERRBS(0); break; case MAT_STRUCTURALLY_SYMMETRIC: case MAT_NOT_SYMMETRIC: case MAT_NOT_STRUCTURALLY_SYMMETRIC: case MAT_HERMITIAN: case MAT_NOT_HERMITIAN: case MAT_SYMMETRY_ETERNAL: case MAT_NOT_SYMMETRY_ETERNAL: ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); break; default: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); break; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetRow_MPIRowbs" PetscErrorCode MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v) { Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data; BSspmat *A = mat->A; BSsprow *rs; PetscFunctionBegin; if (row < AA->rmap.rstart || row >= AA->rmap.rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); rs = A->rows[row - AA->rmap.rstart]; *nz = rs->length; if (v) *v = rs->nz; if (idx) *idx = rs->col; PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatRestoreRow_MPIRowbs" PetscErrorCode MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v) { PetscFunctionBegin; PetscFunctionReturn(0); } /* ------------------------------------------------------------------ */ #undef __FUNCT__ #define __FUNCT__ "MatSetUpPreallocation_MPIRowbs" PetscErrorCode MatSetUpPreallocation_MPIRowbs(Mat A) { PetscErrorCode ierr; PetscFunctionBegin; ierr = MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); PetscFunctionReturn(0); } /* -------------------------------------------------------------------*/ static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs, MatGetRow_MPIRowbs, MatRestoreRow_MPIRowbs, MatMult_MPIRowbs, /* 4*/ MatMultAdd_MPIRowbs, MatMult_MPIRowbs, MatMultAdd_MPIRowbs, MatSolve_MPIRowbs, 0, 0, /*10*/ 0, 0, 0, 0, 0, /*15*/ MatGetInfo_MPIRowbs, 0, MatGetDiagonal_MPIRowbs, 0, MatNorm_MPIRowbs, /*20*/ MatAssemblyBegin_MPIRowbs, MatAssemblyEnd_MPIRowbs, 0, MatSetOption_MPIRowbs, MatZeroEntries_MPIRowbs, /*25*/ MatZeroRows_MPIRowbs, 0, MatLUFactorNumeric_MPIRowbs, 0, MatCholeskyFactorNumeric_MPIRowbs, /*30*/ MatSetUpPreallocation_MPIRowbs, MatILUFactorSymbolic_MPIRowbs, MatIncompleteCholeskyFactorSymbolic_MPIRowbs, 0, 0, /*35*/ 0, MatForwardSolve_MPIRowbs, MatBackwardSolve_MPIRowbs, 0, 0, /*40*/ 0, MatGetSubMatrices_MPIRowbs, 0, 0, 0, /*45*/ 0, MatScale_MPIRowbs, 0, 0, 0, /*50*/ 0, 0, 0, 0, 0, /*55*/ 0, 0, 0, 0, 0, /*60*/ MatGetSubMatrix_MPIRowbs, MatDestroy_MPIRowbs, MatView_MPIRowbs, 0, MatUseScaledForm_MPIRowbs, /*65*/ MatScaleSystem_MPIRowbs, MatUnScaleSystem_MPIRowbs, 0, 0, 0, /*70*/ 0, 0, 0, 0, 0, /*75*/ 0, 0, 0, 0, 0, /*80*/ 0, 0, 0, 0, MatLoad_MPIRowbs, /*85*/ 0, 0, 0, 0, 0, /*90*/ 0, 0, 0, 0, 0, /*95*/ 0, 0, 0, 0}; /* ------------------------------------------------------------------- */ EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "MatMPIRowbsSetPreallocation_MPIRowbs" PetscErrorCode PETSCMAT_DLLEXPORT MatMPIRowbsSetPreallocation_MPIRowbs(Mat mat,int nz,const int nnz[]) { PetscErrorCode ierr; PetscFunctionBegin; mat->preallocated = PETSC_TRUE; ierr = MatCreateMPIRowbs_local(mat,nz,nnz);CHKERRQ(ierr); PetscFunctionReturn(0); } EXTERN_C_END /*MC MATMPIROWBS - MATMPIROWBS = "mpirowbs" - A matrix type providing ILU and ICC for distributed sparse matrices for use with the external package BlockSolve95. If BlockSolve95 is installed (see the manual for instructions on how to declare the existence of external packages), a matrix type can be constructed which invokes BlockSolve95 preconditioners and solvers. Options Database Keys: . -mat_type mpirowbs - sets the matrix type to "mpirowbs" during a call to MatSetFromOptions() Level: beginner .seealso: MatCreateMPIRowbs M*/ EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "MatCreate_MPIRowbs" PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIRowbs(Mat A) { Mat_MPIRowbs *a; BSmapping *bsmap; BSoff_map *bsoff; PetscErrorCode ierr; int *offset,m,M; PetscTruth flg1,flg3; BSprocinfo *bspinfo; MPI_Comm comm; PetscFunctionBegin; comm = A->comm; ierr = PetscNew(Mat_MPIRowbs,&a);CHKERRQ(ierr); A->data = (void*)a; ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); A->factor = 0; A->mapping = 0; a->vecs_permscale = PETSC_FALSE; A->insertmode = NOT_SET_VALUES; a->blocksolveassembly = 0; a->keepzeroedrows = PETSC_FALSE; ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); ierr = PetscMapInitialize(comm,&A->rmap);CHKERRQ(ierr); ierr = PetscMapInitialize(comm,&A->cmap);CHKERRQ(ierr); m = A->rmap.n; M = A->rmap.N; ierr = PetscMalloc((A->rmap.n+1)*sizeof(int),&a->imax);CHKERRQ(ierr); a->reallocs = 0; /* build cache for off array entries formed */ ierr = MatStashCreate_Private(A->comm,1,&A->stash);CHKERRQ(ierr); a->donotstash = PETSC_FALSE; /* Initialize BlockSolve information */ a->A = 0; a->pA = 0; a->comm_pA = 0; a->fpA = 0; a->comm_fpA = 0; a->alpha = 1.0; a->ierr = 0; a->failures = 0; ierr = MPI_Comm_dup(A->comm,&(a->comm_mpirowbs));CHKERRQ(ierr); ierr = VecCreateMPI(A->comm,A->rmap.n,A->rmap.N,&(a->diag));CHKERRQ(ierr); ierr = VecDuplicate(a->diag,&(a->xwork));CHKERRQ(ierr); ierr = PetscLogObjectParent(A,a->diag); PetscLogObjectParent(A,a->xwork);CHKERRQ(ierr); ierr = PetscLogObjectMemory(A,(A->rmap.n+1)*sizeof(PetscScalar));CHKERRQ(ierr); bspinfo = BScreate_ctx();CHKERRBS(0); a->procinfo = bspinfo; BSctx_set_id(bspinfo,a->rank);CHKERRBS(0); BSctx_set_np(bspinfo,a->size);CHKERRBS(0); BSctx_set_ps(bspinfo,a->comm_mpirowbs);CHKERRBS(0); BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0); BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0); BSctx_set_ct(bspinfo,IDO);CHKERRBS(0); #if defined(PETSC_USE_DEBUG) BSctx_set_err(bspinfo,1);CHKERRBS(0); /* BS error checking */ #endif BSctx_set_rt(bspinfo,1);CHKERRBS(0); #if defined (PETSC_USE_INFO) ierr = PetscOptionsHasName(PETSC_NULL,"-info",&flg1);CHKERRQ(ierr); if (flg1) { BSctx_set_pr(bspinfo,1);CHKERRBS(0); } #endif ierr = PetscOptionsBegin(A->comm,PETSC_NULL,"Options for MPIROWBS matrix","Mat");CHKERRQ(ierr); ierr = PetscOptionsTruth("-pc_factor_factorpointwise","Do not optimize for inodes (slow)",PETSC_NULL,PETSC_FALSE,&flg1,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsTruth("-mat_rowbs_no_inode","Do not optimize for inodes (slow)",PETSC_NULL,PETSC_FALSE,&flg3,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsEnd();CHKERRQ(ierr); if (flg1 || flg3) { BSctx_set_si(bspinfo,1);CHKERRBS(0); } else { BSctx_set_si(bspinfo,0);CHKERRBS(0); } #if defined(PETSC_USE_LOG) MLOG_INIT(); /* Initialize logging */ #endif /* Compute global offsets */ offset = &A->rmap.rstart; ierr = PetscNew(BSmapping,&a->bsmap);CHKERRQ(ierr); ierr = PetscLogObjectMemory(A,sizeof(BSmapping));CHKERRQ(ierr); bsmap = a->bsmap; ierr = PetscMalloc(sizeof(int),&bsmap->vlocal2global);CHKERRQ(ierr); *((int*)bsmap->vlocal2global) = (*offset); bsmap->flocal2global = BSloc2glob; bsmap->free_l2g = 0; ierr = PetscMalloc(sizeof(int),&bsmap->vglobal2local);CHKERRQ(ierr); *((int*)bsmap->vglobal2local) = (*offset); bsmap->fglobal2local = BSglob2loc; bsmap->free_g2l = 0; bsoff = BSmake_off_map(*offset,bspinfo,A->rmap.N); bsmap->vglobal2proc = (void*)bsoff; bsmap->fglobal2proc = BSglob2proc; bsmap->free_g2p = (void(*)(void*)) BSfree_off_map; ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIRowbsSetPreallocation_C", "MatMPIRowbsSetPreallocation_MPIRowbs", MatMPIRowbsSetPreallocation_MPIRowbs);CHKERRQ(ierr); ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIROWBS);CHKERRQ(ierr); PetscFunctionReturn(0); } EXTERN_C_END #undef __FUNCT__ #define __FUNCT__ "MatMPIRowbsSetPreallocation" /* @ MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros per row in the matrix. Input Parameter: + mat - matrix . nz - maximum expected for any row - nzz - number expected in each row Note: This routine is valid only for matrices stored in the MATMPIROWBS format. @ */ PetscErrorCode PETSCMAT_DLLEXPORT MatMPIRowbsSetPreallocation(Mat mat,int nz,const int nnz[]) { PetscErrorCode ierr,(*f)(Mat,int,const int[]); PetscFunctionBegin; ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); if (f) { ierr = (*f)(mat,nz,nnz);CHKERRQ(ierr); } PetscFunctionReturn(0); } /* --------------- extra BlockSolve-specific routines -------------- */ #undef __FUNCT__ #define __FUNCT__ "MatGetBSProcinfo" /* @ MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the user can then manipulate to alter the default parameters. Input Parameter: mat - matrix Output Parameter: procinfo - processor information context Note: This routine is valid only for matrices stored in the MATMPIROWBS format. @ */ PetscErrorCode PETSCMAT_DLLEXPORT MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo) { Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data; PetscTruth ismpirowbs; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);CHKERRQ(ierr); if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type"); procinfo = a->procinfo; PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatLoad_MPIRowbs" PetscErrorCode MatLoad_MPIRowbs(PetscViewer viewer,MatType type,Mat *newmat) { Mat_MPIRowbs *a; BSspmat *A; BSsprow **rs; Mat mat; PetscErrorCode ierr; int i,nz,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz; int header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols; PetscScalar *vals; MPI_Comm comm = ((PetscObject)viewer)->comm; MPI_Status status; PetscFunctionBegin; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); if (!rank) { ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object"); if (header[3] < 0) { SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs"); } } ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); M = header[1]; /* determine ownership of all rows */ m = M/size + ((M % size) > rank); ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); rowners[0] = 0; for (i=2; i<=size; i++) { rowners[i] += rowners[i-1]; } rstart = rowners[rank]; rend = rowners[rank+1]; /* distribute row lengths to all processors */ ierr = PetscMalloc((rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); if (!rank) { ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); for (i=0; idata; A = a->A; rs = A->rows; if (!rank) { /* calculate the number of nonzeros on each processor */ ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); for (i=0; inum_rows; i++) { for (j=0; jimax[i]; j++) { rs[i]->col[j] = cols[nz++]; } rs[i]->length = a->imax[i]; } /* read in parts for all other processors */ for (i=1; itag,comm);CHKERRQ(ierr); } ierr = PetscFree(cols);CHKERRQ(ierr); ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); /* read in my part of the matrix numerical values */ nz = procsnz[0]; ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); /* insert it into my part of matrix */ nz = 0; for (i=0; inum_rows; i++) { for (j=0; jimax[i]; j++) { rs[i]->nz[j] = vals[nz++]; } } /* read in parts for all other processors */ for (i=1; itag,comm);CHKERRQ(ierr); } ierr = PetscFree(vals);CHKERRQ(ierr); ierr = PetscFree(procsnz);CHKERRQ(ierr); } else { /* determine buffer space needed for message */ nz = 0; for (i=0; inum_rows; i++) { nz += a->imax[i]; } ierr = PetscMalloc(nz*sizeof(int),&cols);CHKERRQ(ierr); /* receive message of column indices*/ ierr = MPI_Recv(cols,nz,MPI_INT,0,mat->tag,comm,&status);CHKERRQ(ierr); ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong"); /* insert it into my part of matrix */ nz = 0; for (i=0; inum_rows; i++) { for (j=0; jimax[i]; j++) { rs[i]->col[j] = cols[nz++]; } rs[i]->length = a->imax[i]; } ierr = PetscFree(cols);CHKERRQ(ierr); ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); /* receive message of values*/ ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,mat->tag,comm,&status);CHKERRQ(ierr); ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong"); /* insert it into my part of matrix */ nz = 0; for (i=0; inum_rows; i++) { for (j=0; jimax[i]; j++) { rs[i]->nz[j] = vals[nz++]; } rs[i]->length = a->imax[i]; } ierr = PetscFree(vals);CHKERRQ(ierr); } ierr = PetscFree(rowners);CHKERRQ(ierr); a->nz = a->maxnz; ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); PetscFunctionReturn(0); } /* Special destroy and view routines for factored matrices */ #undef __FUNCT__ #define __FUNCT__ "MatDestroy_MPIRowbs_Factored" static PetscErrorCode MatDestroy_MPIRowbs_Factored(Mat mat) { PetscFunctionBegin; #if defined(PETSC_USE_LOG) PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->rmap.N,mat->cmap.N); #endif PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatView_MPIRowbs_Factored" static PetscErrorCode MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer) { PetscErrorCode ierr; PetscFunctionBegin; ierr = MatView((Mat) mat->data,viewer);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatIncompleteCholeskyFactorSymbolic_MPIRowbs" PetscErrorCode MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat mat,IS isrow,MatFactorInfo *info,Mat *newfact) { /* Note: f is not currently used in BlockSolve */ Mat newmat; Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data; PetscErrorCode ierr; PetscTruth idn; PetscFunctionBegin; if (isrow) { ierr = ISIdentity(isrow,&idn);CHKERRQ(ierr); if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported"); } if (!mat->symmetric) { SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky \n\ preconditioning with a MATMPIROWBS matrix you must declare it to be \n\ symmetric using the option MatSetOption(A,MAT_SYMMETRIC)"); } /* If the icc_storage flag wasn't set before the last blocksolveassembly, */ /* we must completely redo the assembly as a different storage format is required. */ if (mbs->blocksolveassembly && !mbs->assembled_icc_storage) { mat->same_nonzero = PETSC_FALSE; mbs->blocksolveassembly = 0; } if (!mbs->blocksolveassembly) { BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0); BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0); ierr = MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);CHKERRQ(ierr); } /* Copy permuted matrix */ if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);} mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0); /* Set up the communication for factorization */ if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);} mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0); /* Create a new Mat structure to hold the "factored" matrix, not this merely contains a pointer to the original matrix, since the original matrix contains the factor information. */ ierr = PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);CHKERRQ(ierr); ierr = PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));CHKERRQ(ierr); newmat->data = (void*)mat; ierr = PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); newmat->ops->destroy = MatDestroy_MPIRowbs_Factored; newmat->ops->view = MatView_MPIRowbs_Factored; newmat->factor = 1; newmat->preallocated = PETSC_TRUE; ierr = PetscMapCopy(mat->comm,&mat->rmap,&newmat->rmap);CHKERRQ(ierr); ierr = PetscMapCopy(mat->comm,&mat->cmap,&newmat->cmap);CHKERRQ(ierr); ierr = PetscStrallocpy(MATMPIROWBS,&newmat->type_name);CHKERRQ(ierr); *newfact = newmat; PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatILUFactorSymbolic_MPIRowbs" PetscErrorCode MatILUFactorSymbolic_MPIRowbs(Mat mat,IS isrow,IS iscol,MatFactorInfo* info,Mat *newfact) { Mat newmat; Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data; PetscErrorCode ierr; PetscTruth idn; PetscFunctionBegin; if (info->levels) SETERRQ(PETSC_ERR_SUP,"Blocksolve ILU only supports 0 fill"); if (isrow) { ierr = ISIdentity(isrow,&idn);CHKERRQ(ierr); if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported"); } if (iscol) { ierr = ISIdentity(iscol,&idn);CHKERRQ(ierr); if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported"); } if (!mbs->blocksolveassembly) { ierr = MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);CHKERRQ(ierr); } /* if (mat->symmetric) { */ /* SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with \n\ */ /* MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrix\n\ */ /* using the option MatSetOption(A,MAT_SYMMETRIC)"); */ /* } */ /* Copy permuted matrix */ if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);} mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0); /* Set up the communication for factorization */ if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);} mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0); /* Create a new Mat structure to hold the "factored" matrix, not this merely contains a pointer to the original matrix, since the original matrix contains the factor information. */ ierr = PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);CHKERRQ(ierr); ierr = PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));CHKERRQ(ierr); newmat->data = (void*)mat; ierr = PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); newmat->ops->destroy = MatDestroy_MPIRowbs_Factored; newmat->ops->view = MatView_MPIRowbs_Factored; newmat->factor = 1; newmat->preallocated = PETSC_TRUE; ierr = PetscMapCopy(mat->comm,&mat->rmap,&newmat->rmap);CHKERRQ(ierr); ierr = PetscMapCopy(mat->comm,&mat->cmap,&newmat->cmap);CHKERRQ(ierr); ierr = PetscStrallocpy(MATMPIROWBS,&newmat->type_name);CHKERRQ(ierr); *newfact = newmat; PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatCreateMPIRowbs" /*@C MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS format. This format is intended primarily as an interface for BlockSolve95. Collective on MPI_Comm Input Parameters: + comm - MPI communicator . m - number of local rows (or PETSC_DECIDE to have calculated) . M - number of global rows (or PETSC_DECIDE to have calculated) . nz - number of nonzeros per row (same for all local rows) - nnz - number of nonzeros per row (possibly different for each row). Output Parameter: . newA - the matrix Notes: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor than it must be used on all processors that share the object for that argument. The user MUST specify either the local or global matrix dimensions (possibly both). Specify the preallocated storage with either nz or nnz (not both). Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory allocation. Notes: By default, the matrix is assumed to be nonsymmetric; the user can take advantage of special optimizations for symmetric matrices by calling $ MatSetOption(mat,MAT_SYMMETRIC) $ MatSetOption(mat,MAT_SYMMETRY_ETERNAL) BEFORE calling the routine MatAssemblyBegin(). Internally, the MATMPIROWBS format inserts zero elements to the matrix if necessary, so that nonsymmetric matrices are considered to be symmetric in terms of their sparsity structure; this format is required for use of the parallel communication routines within BlockSolve95. In particular, if the matrix element A[i,j] exists, then PETSc will internally allocate a 0 value for the element A[j,i] during MatAssemblyEnd() if the user has not already set a value for the matrix element A[j,i]. Options Database Keys: . -mat_rowbs_no_inode - Do not use inodes. Level: intermediate .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve .seealso: MatCreate(), MatSetValues() @*/ PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,const int nnz[],Mat *newA) { PetscErrorCode ierr; PetscFunctionBegin; ierr = MatCreate(comm,newA);CHKERRQ(ierr); ierr = MatSetSizes(*newA,m,m,M,M);CHKERRQ(ierr); ierr = MatSetType(*newA,MATMPIROWBS);CHKERRQ(ierr); ierr = MatMPIRowbsSetPreallocation(*newA,nz,nnz);CHKERRQ(ierr); PetscFunctionReturn(0); } /* -------------------------------------------------------------------------*/ #include "src/mat/impls/aij/seq/aij.h" #include "src/mat/impls/aij/mpi/mpiaij.h" #undef __FUNCT__ #define __FUNCT__ "MatGetSubMatrices_MPIRowbs" PetscErrorCode MatGetSubMatrices_MPIRowbs(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) { PetscErrorCode ierr; int nmax,nstages_local,nstages,i,pos,max_no; PetscFunctionBegin; /* Allocate memory to hold all the submatrices */ if (scall != MAT_REUSE_MATRIX) { ierr = PetscMalloc((ismax+1)*sizeof(Mat),submat);CHKERRQ(ierr); } /* Determine the number of stages through which submatrices are done */ nmax = 20*1000000 / (C->cmap.N * sizeof(int)); if (!nmax) nmax = 1; nstages_local = ismax/nmax + ((ismax % nmax)?1:0); /* Make sure every processor loops through the nstages */ ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);CHKERRQ(ierr); for (i=0,pos=0; idata); BSspmat *A = c->A; Mat_SeqAIJ *mat; PetscErrorCode ierr; int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size; int **sbuf1,**sbuf2,rank,m,i,j,k,l,ct1,ct2,**rbuf1,row,proc; int nrqs,msz,**ptr,idx,*req_size,*ctr,*pa,*tmp,tcol,nrqr; int **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2,**rmap; int **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i; int len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i; int *rmap_i,tag0,tag1,tag2,tag3; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3; MPI_Request *r_waits4,*s_waits3,*s_waits4; MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2; MPI_Status *r_status3,*r_status4,*s_status4; MPI_Comm comm; FLOAT **rbuf4,**sbuf_aa,*vals,*sbuf_aa_i; PetscScalar *mat_a; PetscTruth sorted; int *onodes1,*olengths1; PetscFunctionBegin; comm = C->comm; tag0 = C->tag; size = c->size; rank = c->rank; m = C->rmap.N; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); /* Check if the col indices are sorted */ for (i=0; i proc*/ for (i=0,j=0; irmap.range[i+1]; for (; jrows; int id,rstart = C->rmap.rstart; int *sbuf2_i; for (i=0; ilength; sbuf2_i[j] = ncols; req_size[idx] += ncols; } req_source[idx] = r_status1[i].MPI_SOURCE; /* form the header */ sbuf2_i[0] = req_size[idx]; for (j=1; jj, and send them off */ ierr = PetscMalloc((nrqr+1)*sizeof(int*),&sbuf_aj);CHKERRQ(ierr); for (i=0,j=0; irmap.rstart; for (i=0; irows[rbuf1_i[ct1] - rstart]; ncols = brow->length; Acol = brow->col; /* load the column indices for this row into cols*/ cols = sbuf_aj_i + ct2; ierr = PetscMemcpy(cols,Acol,ncols*sizeof(int));CHKERRQ(ierr); /*for (l=0; la, and send them off */ ierr = PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf_aa);CHKERRQ(ierr); for (i=0,j=0; irmap.rstart; for (i=0; irows[rbuf1_i[ct1] - rstart]; ncols = brow->length; Aval = brow->nz; /* load the column values for this row into vals*/ vals = sbuf_aa_i+ct2; ierr = PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));CHKERRQ(ierr); ct2 += ncols; } } ierr = MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);CHKERRQ(ierr); } } ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);CHKERRQ(ierr); ierr = PetscFree(rbuf1);CHKERRQ(ierr); /* Form the matrix */ /* create col map */ { int *icol_i; len = (1+ismax)*sizeof(int*)+ ismax*C->cmap.N*sizeof(int); ierr = PetscMalloc(len,&cmap);CHKERRQ(ierr); cmap[0] = (int*)(cmap + ismax); ierr = PetscMemzero(cmap[0],(1+ismax*C->cmap.N)*sizeof(int));CHKERRQ(ierr); for (i=1; icmap.N; } for (i=0; irows[row-C->rmap.rstart]; ncols=Arow->length; cols=Arow->col; for (k=0; krmap.N*sizeof(int); ierr = PetscMalloc(len,&rmap);CHKERRQ(ierr); rmap[0] = (int*)(rmap + ismax); ierr = PetscMemzero(rmap[0],ismax*C->rmap.N*sizeof(int));CHKERRQ(ierr); for (i=1; irmap.N;} for (i=0; idata); if ((submats[i]->rmap.n != nrow[i]) || (submats[i]->cmap.n != ncol[i])) { SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); } ierr = PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap.n*sizeof(int),&same);CHKERRQ(ierr); if (!same) { SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); } /* Initial matrix as if empty */ ierr = PetscMemzero(mat->ilen,submats[i]->rmap.n*sizeof(int));CHKERRQ(ierr); submats[i]->factor = C->factor; } } else { for (i=0; idata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; imat_a = mat->a; cmap_i = cmap[i]; rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; jrows[old_row-C->rmap.rstart]; ncols=Arow->length; cols=Arow->col; vals=Arow->nz; mat_i = imat_i[row]; mat_a = imat_a + mat_i; mat_j = imat_j + mat_i; for (k=0; kdata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; imat_a = mat->a; max1 = sbuf1_i[2*j]; for (k=0; kdata; BSspmat *A = c->A; BSsprow *Arow; Mat_SeqAIJ *matA,*matB; /* on prac , off proc part of submat */ Mat_MPIAIJ *mat; /* submat->data */ PetscErrorCode ierr; int *irow,*icol,nrow,ncol,*rtable,size,rank,tag0,tag1,tag2,tag3; int *w1,*w2,*pa,nrqs,nrqr,msz,row_t; int i,j,k,l,len,jmax,proc,idx; int **sbuf1,**sbuf2,**rbuf1,**rbuf2,*req_size,**sbuf3,**rbuf3; FLOAT **rbuf4,**sbuf4; /* FLOAT is from Block Solve 95 library */ int *cmap,*rmap,nlocal,*o_nz,*d_nz,cstart,cend; int *req_source; int ncols_t; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3; MPI_Request *r_waits4,*s_waits3,*s_waits4; MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2; MPI_Status *r_status3,*r_status4,*s_status4; MPI_Comm comm; PetscFunctionBegin; comm = C->comm; tag0 = C->tag; size = c->size; rank = c->rank; if (size==1) { if (scall == MAT_REUSE_MATRIX) { ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_REUSE_MATRIX,&submat);CHKERRQ(ierr); PetscFunctionReturn(0); } else { Mat *newsubmat; ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&newsubmat);CHKERRQ(ierr); *submat=*newsubmat; ierr=PetscFree(newsubmat);CHKERRQ(ierr); PetscFunctionReturn(0); } } /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); /* Check if the col indices are sorted */ {PetscTruth sorted; ierr = ISSorted(isrow,&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted"); ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); } ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); if (!isrow) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty ISrow"); if (!iscol) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty IScol"); len = (C->rmap.N+1)*sizeof(int); ierr = PetscMalloc(len,&rtable);CHKERRQ(ierr); /* Create hash table for the mapping :row -> proc*/ for (i=0,j=0; irmap.range[i+1]; for (; jrows; int id,rstart = C->rmap.rstart; int *sbuf2_i,*rbuf1_i,end; for (i=0; ilength; sbuf2_i[j] = ncols_t; req_size[idx] += ncols_t; } req_source[idx] = r_status1[i].MPI_SOURCE; /* form the header */ sbuf2_i[0] = req_size[idx]; ierr = MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);CHKERRQ(ierr); } } ierr = PetscFree(r_status1);CHKERRQ(ierr); ierr = PetscFree(r_waits1);CHKERRQ(ierr); /* recv buffer sizes */ /* Receive messages*/ ierr = PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(FLOAT*),&rbuf4);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr); for (i=0; ij, and send them off */ /* structure of sbuf3[i]/rbuf3[i],sbuf4[i]/rbuf4[i]: reqsize[i] (cols resp. * vals of all req. rows; row sizes was in rbuf2; vals are of FLOAT type */ ierr = PetscMalloc((nrqr+1)*sizeof(int*),&sbuf3);CHKERRQ(ierr); for (i=0,j=0; irmap.rstart; for (i=0; irows[rbuf1_i[rqrow] - rstart]; ncols = Arow->length; Acol = Arow->col; /* load the column indices for this row into cols*/ cols = sbuf3_i + noutcols; ierr = PetscMemcpy(cols,Acol,ncols*sizeof(int));CHKERRQ(ierr); /*for (l=0; la, and send them off */ /* can be optimized by conect with previous block */ ierr = PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf4);CHKERRQ(ierr); for (i=0,j=0; irmap.rstart,*rbuf1_i,rqrow,noutvals,kmax,ncols; for (i=0; irows[rbuf1_i[rqrow] - rstart]; ncols = Arow->length; Aval = Arow->nz; /* load the column values for this row into vals*/ vals = sbuf4_i+noutvals; ierr = PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));CHKERRQ(ierr); noutvals += ncols; } ierr = MPI_Isend(sbuf4_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);CHKERRQ(ierr); } } ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);CHKERRQ(ierr); ierr = PetscFree(rbuf1);CHKERRQ(ierr); /* Form the matrix */ /* create col map */ len = C->cmap.N*sizeof(int)+1; ierr = PetscMalloc(len,&cmap);CHKERRQ(ierr); ierr = PetscMemzero(cmap,C->cmap.N*sizeof(int));CHKERRQ(ierr); for (j=0; jrmap.N*sizeof(int)+1; ierr = PetscMalloc(len,&rmap);CHKERRQ(ierr); ierr = PetscMemzero(rmap,C->rmap.N*sizeof(int));CHKERRQ(ierr); for (j=0; j rank); } else { nlocal = csize; } { int ncols,*cols,olen,dlen,thecol; int *rbuf2_i,*rbuf3_i,*sbuf1_i,row,kmax,cidx; ierr = MPI_Scan(&nlocal,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); cstart = cend - nlocal; if (rank == size - 1 && cend != ncol) { SETERRQ(PETSC_ERR_ARG_SIZ,"Local column sizes do not add up to total number of columns"); } ierr = PetscMalloc((2*nrow+1)*sizeof(int),&d_nz);CHKERRQ(ierr); o_nz = d_nz + nrow; /* Update lens from local data */ for (j=0; jrows[row-C->rmap.rstart]; ncols=Arow->length; cols=Arow->col; olen=dlen=0; for (k=0; ktype_name);CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(*submat,0,d_nz,0,o_nz);CHKERRQ(ierr); mat=(Mat_MPIAIJ *)((*submat)->data); matA=(Mat_SeqAIJ *)(mat->A->data); matB=(Mat_SeqAIJ *)(mat->B->data); } else { PetscTruth same; /* folowing code can be optionaly dropped for debuged versions of users * program, but I don't know PETSc option which can switch off such safety * tests - in a same way counting of o_nz,d_nz can be droped for REUSE * matrix */ PetscTypeCompare((PetscObject)(*submat),MATMPIAIJ,&same); if (!same) { SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type"); } if (((*submat)->rmap.n != nrow) || ((*submat)->cmap.N != ncol)) { SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); } mat=(Mat_MPIAIJ *)((*submat)->data); matA=(Mat_SeqAIJ *)(mat->A->data); matB=(Mat_SeqAIJ *)(mat->B->data); ierr = PetscMemcmp(matA->ilen,d_nz,nrow*sizeof(int),&same);CHKERRQ(ierr); if (!same) { SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); } ierr = PetscMemcmp(matB->ilen,o_nz,nrow*sizeof(int),&same);CHKERRQ(ierr); if (!same) { SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); } /* Initial matrix as if empty */ ierr = PetscMemzero(matA->ilen,nrow*sizeof(int));CHKERRQ(ierr); ierr = PetscMemzero(matB->ilen,nrow*sizeof(int));CHKERRQ(ierr); /* Perhaps MatZeroEnteries may be better - look what it is exactly doing - I must * delete all possibly nonactual inforamtion */ /*submats[i]->factor = C->factor; !!! ??? if factor will be same then I must * copy some factor information - where are thay */ (*submat)->was_assembled=PETSC_FALSE; (*submat)->assembled=PETSC_FALSE; } ierr = PetscFree(d_nz);CHKERRQ(ierr); /* Assemble the matrix */ /* First assemble from local rows */ { int i_row,oldrow,row,ncols,*cols,*matA_j,*matB_j,ilenA,ilenB,tcol; FLOAT *vals; PetscScalar *matA_a,*matB_a; for (j=0; jrows[oldrow-C->rmap.rstart]; ncols = Arow->length; cols = Arow->col; vals = Arow->nz; i_row = matA->i[row]; matA_a = matA->a + i_row; matA_j = matA->j + i_row; i_row = matB->i[row]; matB_a = matB->a + i_row; matB_j = matB->j + i_row; for (k=0,ilenA=0,ilenB=0; kilen[row]=ilenA; matB->ilen[row]=ilenB; } } } /* Now assemble the off proc rows*/ { int *sbuf1_i,*rbuf2_i,*rbuf3_i,cidx,kmax,row,i_row; int *matA_j,*matB_j,lmax,tcol,ilenA,ilenB; PetscScalar *matA_a,*matB_a; FLOAT *rbuf4_i; for (j=0; ji[row]; matA_a = matA->a + i_row; matA_j = matA->j + i_row; i_row = matB->i[row]; matB_a = matB->a + i_row; matB_j = matB->j + i_row; lmax = rbuf2_i[k]; for (l=0,ilenA=0,ilenB=0; lilen[row]=ilenA; matB->ilen[row]=ilenB; } } } ierr = PetscFree(r_status4);CHKERRQ(ierr); ierr = PetscFree(r_waits4);CHKERRQ(ierr); if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits4,s_status4);CHKERRQ(ierr);} ierr = PetscFree(s_waits4);CHKERRQ(ierr); ierr = PetscFree(s_status4);CHKERRQ(ierr); /* Restore the indices */ ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); /* Destroy allocated memory */ ierr = PetscFree(rtable);CHKERRQ(ierr); ierr = PetscFree(w1);CHKERRQ(ierr); ierr = PetscFree(pa);CHKERRQ(ierr); ierr = PetscFree(sbuf1);CHKERRQ(ierr); ierr = PetscFree(rbuf2[0]);CHKERRQ(ierr); ierr = PetscFree(rbuf2);CHKERRQ(ierr); for (i=0; i