Actual source code: nn.c
  1: #define PETSCKSP_DLL

 3:  #include src/ksp/pc/impls/is/nn/nn.h

  5: /* -------------------------------------------------------------------------- */
  6: /*
  7:    PCSetUp_NN - Prepares for the use of the NN preconditioner
  8:                     by setting data structures and options.   

 10:    Input Parameter:
 11: .  pc - the preconditioner context

 13:    Application Interface Routine: PCSetUp()

 15:    Notes:
 16:    The interface routine PCSetUp() is not usually called directly by
 17:    the user, but instead is called by PCApply() if necessary.
 18: */
 21: static PetscErrorCode PCSetUp_NN(PC pc)
 22: {
 24: 
 26:   if (!pc->setupcalled) {
 27:     /* Set up all the "iterative substructuring" common block */
 28:     PCISSetUp(pc);
 29:     /* Create the coarse matrix. */
 30:     PCNNCreateCoarseMatrix(pc);
 31:   }
 32:   return(0);
 33: }

 35: /* -------------------------------------------------------------------------- */
 36: /*
 37:    PCApply_NN - Applies the NN preconditioner to a vector.

 39:    Input Parameters:
 40: .  pc - the preconditioner context
 41: .  r - input vector (global)

 43:    Output Parameter:
 44: .  z - output vector (global)

 46:    Application Interface Routine: PCApply()
 47:  */
 50: static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
 51: {
 52:   PC_IS          *pcis = (PC_IS*)(pc->data);
 54:   PetscScalar    m_one = -1.0;
 55:   Vec            w = pcis->vec1_global;

 58:   /*
 59:     Dirichlet solvers.
 60:     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
 61:     Storing the local results at vec2_D
 62:   */
 63:   VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
 64:   VecScatterEnd  (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
 65:   KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
 66: 
 67:   /*
 68:     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
 69:     Storing the result in the interface portion of the global vector w.
 70:   */
 71:   MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
 72:   VecScale(pcis->vec1_B,m_one);
 73:   VecCopy(r,w);
 74:   VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
 75:   VecScatterEnd  (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);

 77:   /*
 78:     Apply the interface preconditioner
 79:   */
 80:   PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
 81:                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);

 83:   /*
 84:     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
 85:     The result is stored in vec1_D.
 86:   */
 87:   VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
 88:   VecScatterEnd  (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
 89:   MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);

 91:   /*
 92:     Dirichlet solvers.
 93:     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
 94:     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
 95:   */
 96:   VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
 97:   VecScatterEnd  (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
 98:   KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
 99:   VecScale(pcis->vec2_D,m_one);
100:   VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
101:   VecScatterEnd  (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
102:   return(0);
103: }

105: /* -------------------------------------------------------------------------- */
106: /*
107:    PCDestroy_NN - Destroys the private context for the NN preconditioner
108:    that was created with PCCreate_NN().

110:    Input Parameter:
111: .  pc - the preconditioner context

113:    Application Interface Routine: PCDestroy()
114: */
117: static PetscErrorCode PCDestroy_NN(PC pc)
118: {
119:   PC_NN          *pcnn = (PC_NN*)pc->data;

123:   PCISDestroy(pc);

125:   if (pcnn->coarse_mat)  {MatDestroy(pcnn->coarse_mat);}
126:   if (pcnn->coarse_x)    {VecDestroy(pcnn->coarse_x);}
127:   if (pcnn->coarse_b)    {VecDestroy(pcnn->coarse_b);}
128:   if (pcnn->ksp_coarse) {KSPDestroy(pcnn->ksp_coarse);}
129:   if (pcnn->DZ_IN) {
130:     PetscFree(pcnn->DZ_IN[0]);
131:     PetscFree(pcnn->DZ_IN);
132:   }

134:   /*
135:       Free the private data structure that was hanging off the PC
136:   */
137:   PetscFree(pcnn);
138:   return(0);
139: }

141: /* -------------------------------------------------------------------------- */
142: /*MC
143:    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.

145:    Options Database Keys:
146: +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
147:                                        (this skips the first coarse grid solve in the preconditioner)
148: .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
149:                                        (this skips the second coarse grid solve in the preconditioner)
150: .    -pc_is_damp_fixed <fact> -
151: .    -pc_is_remove_nullspace_fixed -
152: .    -pc_is_set_damping_factor_floating <fact> -
153: .    -pc_is_not_damp_floating -
154: +    -pc_is_not_remove_nullspace_floating - 

156:    Level: intermediate

158:    Notes: The matrix used with this preconditioner must be of type MATIS 

160:           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
161:           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
162:           on the subdomains; though in our experience using approximate solvers is slower.).

164:           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
165:           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
166:           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx

168:    Contributed by Paulo Goldfeld

170: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MatIS
171: M*/
175: PetscErrorCode  PCCreate_NN(PC pc)
176: {
178:   PC_NN          *pcnn;

181:   /*
182:      Creates the private data structure for this preconditioner and
183:      attach it to the PC object.
184:   */
185:   PetscNew(PC_NN,&pcnn);
186:   pc->data  = (void*)pcnn;

188:   /*
189:      Logs the memory usage; this is not needed but allows PETSc to 
190:      monitor how much memory is being used for various purposes.
191:   */
192:   PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */

194:   PCISCreate(pc);
195:   pcnn->coarse_mat  = 0;
196:   pcnn->coarse_x    = 0;
197:   pcnn->coarse_b    = 0;
198:   pcnn->ksp_coarse = 0;
199:   pcnn->DZ_IN       = 0;

201:   /*
202:       Set the pointers for the functions that are provided above.
203:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
204:       are called, they will automatically call these functions.  Note we
205:       choose not to provide a couple of these functions since they are
206:       not needed.
207:   */
208:   pc->ops->apply               = PCApply_NN;
209:   pc->ops->applytranspose      = 0;
210:   pc->ops->setup               = PCSetUp_NN;
211:   pc->ops->destroy             = PCDestroy_NN;
212:   pc->ops->view                = 0;
213:   pc->ops->applyrichardson     = 0;
214:   pc->ops->applysymmetricleft  = 0;
215:   pc->ops->applysymmetricright = 0;
216:   return(0);
217: }


221: /* -------------------------------------------------------------------------- */
222: /*
223:    PCNNCreateCoarseMatrix - 
224: */
227: PetscErrorCode PCNNCreateCoarseMatrix (PC pc)
228: {
229:   MPI_Request    *send_request, *recv_request;
231:   PetscInt       i, j, k;
232:   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
233:   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */

235:   /* aliasing some names */
236:   PC_IS*         pcis     = (PC_IS*)(pc->data);
237:   PC_NN*         pcnn     = (PC_NN*)pc->data;
238:   PetscInt       n_neigh  = pcis->n_neigh;
239:   PetscInt*      neigh    = pcis->neigh;
240:   PetscInt*      n_shared = pcis->n_shared;
241:   PetscInt**     shared   = pcis->shared;
242:   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */

245:   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
246:   PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);

248:   /* Allocate memory for DZ */
249:   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
250:   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
251:   {
252:     PetscInt size_of_Z = 0;
253:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
254:     DZ_IN = pcnn->DZ_IN;
255:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
256:     for (i=0; i<n_neigh; i++) {
257:       size_of_Z += n_shared[i];
258:     }
259:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
260:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
261:   }
262:   for (i=1; i<n_neigh; i++) {
263:     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
264:     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
265:   }

267:   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
268:   /* First, set the auxiliary array pcis->work_N. */
269:   PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
270:   for (i=1; i<n_neigh; i++){
271:     for (j=0; j<n_shared[i]; j++) {
272:       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
273:     }
274:   }

276:   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
277:   /* Notice that send_request[] and recv_request[] could have one less element. */
278:   /* We make them longer to have request[i] corresponding to neigh[i].          */
279:   {
280:     PetscMPIInt tag;
281:     PetscObjectGetNewTag((PetscObject)pc,&tag);
282:     PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);
283:     recv_request = send_request + (n_neigh);
284:     for (i=1; i<n_neigh; i++) {
285:       MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));
286:       MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));
287:     }
288:   }

290:   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
291:   for(j=0; j<n_shared[0]; j++) {
292:     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
293:   }

295:   /* Start computing with local D*Z while communication goes on.    */
296:   /* Apply Schur complement. The result is "stored" in vec (more    */
297:   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
298:   /* and also scattered to pcnn->work_N.                            */
299:   PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
300:                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);

302:   /* Compute the first column, while completing the receiving. */
303:   for (i=0; i<n_neigh; i++) {
304:     MPI_Status  stat;
305:     PetscMPIInt ind=0;
306:     if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
307:     mat[ind*n_neigh+0] = 0.0;
308:     for (k=0; k<n_shared[ind]; k++) {
309:       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
310:     }
311:   }

313:   /* Compute the remaining of the columns */
314:   for (j=1; j<n_neigh; j++) {
315:     PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
316:                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
317:     for (i=0; i<n_neigh; i++) {
318:       mat[i*n_neigh+j] = 0.0;
319:       for (k=0; k<n_shared[i]; k++) {
320:         mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
321:       }
322:     }
323:   }

325:   /* Complete the sending. */
326:   if (n_neigh>1) {
327:     MPI_Status *stat;
328:     PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);
329:     if (n_neigh-1) {MPI_Waitall(n_neigh-1,&(send_request[1]),stat);}
330:     PetscFree(stat);
331:   }

333:   /* Free the memory for the MPI requests */
334:   PetscFree(send_request);

336:   /* Free the memory for DZ_OUT */
337:   if (DZ_OUT) {
338:     PetscFree(DZ_OUT[0]);
339:     PetscFree(DZ_OUT);
340:   }

342:   {
343:     PetscMPIInt size;
344:     MPI_Comm_size(pc->comm,&size);
345:     /* Create the global coarse vectors (rhs and solution). */
346:     VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));
347:     VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
348:     /* Create and set the global coarse AIJ matrix. */
349:     MatCreate(pc->comm,&(pcnn->coarse_mat));
350:     MatSetSizes(pcnn->coarse_mat,1,1,size,size);
351:     MatSetType(pcnn->coarse_mat,MATAIJ);
352:     MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);
353:     MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);
354:     MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
355:     MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
356:     MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
357:   }

359:   {
360:     PetscMPIInt rank;
361:     PetscScalar one = 1.0;
362:     MPI_Comm_rank(pc->comm,&rank);
363:     /* "Zero out" rows of not-purely-Neumann subdomains */
364:     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
365:       MatZeroRows(pcnn->coarse_mat,0,PETSC_NULL,one);
366:     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
367:       PetscInt row = (PetscInt) rank;
368:       MatZeroRows(pcnn->coarse_mat,1,&row,one);
369:     }
370:   }

372:   /* Create the coarse linear solver context */
373:   {
374:     PC  pc_ctx, inner_pc;
375:     KSPCreate(pc->comm,&pcnn->ksp_coarse);
376:     KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);
377:     KSPGetPC(pcnn->ksp_coarse,&pc_ctx);
378:     PCSetType(pc_ctx,PCREDUNDANT);
379:     KSPSetType(pcnn->ksp_coarse,KSPPREONLY);
380:     PCRedundantGetPC(pc_ctx,&inner_pc);
381:     PCSetType(inner_pc,PCLU);
382:     KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");
383:     KSPSetFromOptions(pcnn->ksp_coarse);
384:     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
385:     KSPSetUp(pcnn->ksp_coarse);
386:   }

388:   /* Free the memory for mat */
389:   PetscFree(mat);

391:   /* for DEBUGGING, save the coarse matrix to a file. */
392:   {
393:     PetscTruth flg;
394:     PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);
395:     if (flg) {
396:       PetscViewer viewer;
397:       PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
398:       PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
399:       MatView(pcnn->coarse_mat,viewer);
400:       PetscViewerDestroy(viewer);
401:     }
402:   }

404:   /*  Set the variable pcnn->factor_coarse_rhs. */
405:   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;

407:   /* See historical note 02, at the bottom of this file. */
408:   return(0);
409: }

411: /* -------------------------------------------------------------------------- */
412: /*
413:    PCNNApplySchurToChunk - 

415:    Input parameters:
416: .  pcnn
417: .  n - size of chunk
418: .  idx - indices of chunk
419: .  chunk - values

421:    Output parameters:
422: .  array_N - result of Schur complement applied to chunk, scattered to big array
423: .  vec1_B  - result of Schur complement applied to chunk
424: .  vec2_B  - garbage (used as work space)
425: .  vec1_D  - garbage (used as work space)
426: .  vec2_D  - garbage (used as work space)

428: */
431: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
432: {
434:   PetscInt       i;
435:   PC_IS          *pcis = (PC_IS*)(pc->data);

438:   PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));
439:   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
440:   PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
441:   PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
442:   PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
443:   return(0);
444: }

446: /* -------------------------------------------------------------------------- */
447: /*
448:    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 
449:                                       the preconditioner for the Schur complement.

451:    Input parameter:
452: .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.

454:    Output parameters:
455: .  z - global vector of interior and interface nodes. The values on the interface are the result of
456:        the application of the interface preconditioner to the interface part of r. The values on the
457:        interior nodes are garbage.
458: .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
459: .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
460: .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
461: .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
462: .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
463: .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
464: .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
465: .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)

467: */
470: PetscErrorCode PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
471:                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
472: {
474:   PC_IS*         pcis = (PC_IS*)(pc->data);

477:   /*
478:     First balancing step.
479:   */
480:   {
481:     PetscTruth flg;
482:     PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);
483:     if (!flg) {
484:       PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
485:     } else {
486:       VecCopy(r,z);
487:     }
488:   }

490:   /*
491:     Extract the local interface part of z and scale it by D 
492:   */
493:   VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
494:   VecScatterEnd  (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
495:   VecPointwiseMult(vec2_B,pcis->D,vec1_B);

497:   /* Neumann Solver */
498:   PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);

500:   /*
501:     Second balancing step.
502:   */
503:   {
504:     PetscTruth flg;
505:     PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);
506:     if (!flg) {
507:       PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
508:     } else {
509:       VecPointwiseMult(vec2_B,pcis->D,vec1_B);
510:       VecSet(z,0.0);
511:       VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
512:       VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
513:     }
514:   }
515:   return(0);
516: }

518: /* -------------------------------------------------------------------------- */
519: /*
520:    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
521:                    input argument u is provided), or s, as given in equations
522:                    (12) and (13), if the input argument u is a null vector.
523:                    Notice that the input argument u plays the role of u_i in
524:                    equation (14). The equation numbers refer to [Man93].

526:    Input Parameters:
527: .  pcnn - NN preconditioner context.
528: .  r - MPI vector of all nodes (interior and interface). It's preserved.
529: .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.

531:    Output Parameters:
532: .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
533: .  vec1_B - Sequential vector of local interface nodes. Workspace.
534: .  vec2_B - Sequential vector of local interface nodes. Workspace.
535: .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
536: .  vec1_D - Sequential vector of local interior nodes. Workspace.
537: .  vec2_D - Sequential vector of local interior nodes. Workspace.
538: .  work_N - Array of all local nodes (interior and interface). Workspace.

540: */
543: PetscErrorCode PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
544:                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
545: {
547:   PetscInt       k;
548:   PetscScalar    value;
549:   PetscScalar*   lambda;
550:   PC_NN*         pcnn     = (PC_NN*)(pc->data);
551:   PC_IS*         pcis     = (PC_IS*)(pc->data);

555:   if (u) {
556:     if (!vec3_B) { vec3_B = u; }
557:     VecPointwiseMult(vec1_B,pcis->D,u);
558:     VecSet(z,0.0);
559:     VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
560:     VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
561:     VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
562:     VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
563:     PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
564:     VecScale(vec3_B,-1.0);
565:     VecCopy(r,z);
566:     VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
567:     VecScatterEnd  (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
568:   } else {
569:     VecCopy(r,z);
570:   }
571:   VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
572:   VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
573:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
574:   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
575:   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
576:   {
577:     PetscMPIInt rank;
578:     MPI_Comm_rank(pc->comm,&rank);
579:     VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
580:     /*
581:        Since we are only inserting local values (one value actually) we don't need to do the 
582:        reduction that tells us there is no data that needs to be moved. Hence we comment out these
583:        VecAssemblyBegin(pcnn->coarse_b); 
584:        VecAssemblyEnd  (pcnn->coarse_b);
585:     */
586:   }
587:   KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);
588:   if (!u) { VecScale(pcnn->coarse_x,-1.0); }
589:   VecGetArray(pcnn->coarse_x,&lambda);
590:   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
591:   VecRestoreArray(pcnn->coarse_x,&lambda);
592:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
593:   VecSet(z,0.0);
594:   VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
595:   VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
596:   if (!u) {
597:     VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
598:     VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
599:     PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
600:     VecCopy(r,z);
601:   }
602:   VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
603:   VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
605:   return(0);
606: }




612: /*  -------   E N D   O F   T H E   C O D E   -------  */
613: /*                                                     */
614: /*  From now on, "footnotes" (or "historical notes").  */
615: /*                                                     */
616: /*  -------------------------------------------------  */



620: /* --------------------------------------------------------------------------
621:    Historical note 01 
622:    -------------------------------------------------------------------------- */
623: /*
624:    We considered the possibility of an alternative D_i that would still
625:    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
626:    The basic principle was still the pseudo-inverse of the counting
627:    function; the difference was that we would not count subdomains
628:    that do not contribute to the coarse space (i.e., not pure-Neumann
629:    subdomains).

631:    This turned out to be a bad idea:  we would solve trivial Neumann
632:    problems in the not pure-Neumann subdomains, since we would be scaling
633:    the balanced residual by zero.
634: */




639: /* --------------------------------------------------------------------------
640:    Historical note 02 
641:    -------------------------------------------------------------------------- */
642: /*
643:    We tried an alternative coarse problem, that would eliminate exactly a
644:    constant error. Turned out not to improve the overall convergence.
645: */