Actual source code: ex30.c

  1: static char help[] = "Grid based Landau collision operator with PIC interface with OpenMP setup. (one species per grid)\n";

  3: /*
  4:    Support 2.5V with axisymmetric coordinates
  5:      - r,z coordinates
  6:      - Domain and species data input by Landau operator
  7:      - "radius" for each grid, normalized with electron thermal velocity
  8:      - Domain: (0,radius) x (-radius,radius), thus first coordinate x[0] is perpendicular velocity and 2pi*x[0] term is added for axisymmetric
  9:    Supports full 3V

 11:  */

 13: #include <petscdmplex.h>
 14: #include <petscds.h>
 15: #include <petscdmswarm.h>
 16: #include <petscksp.h>
 17: #include <petsc/private/petscimpl.h>
 18: #if defined(PETSC_HAVE_OPENMP) && defined(PETSC_HAVE_THREADSAFETY)
 19:   #include <omp.h>
 20: #endif
 21: #include <petsclandau.h>
 22: #include <petscdmcomposite.h>

 24: typedef struct {
 25:   Mat MpTrans;
 26:   Mat Mp;
 27:   Vec ff;
 28:   Vec uu;
 29: } MatShellCtx;

 31: typedef struct {
 32:   PetscInt   v_target;
 33:   PetscInt   g_target;
 34:   PetscInt   global_vertex_id_0;
 35:   DM        *globSwarmArray;
 36:   LandauCtx *ctx;
 37:   DM        *grid_dm;
 38:   Mat       *g_Mass;
 39:   Mat       *globMpArray;
 40:   Vec       *globXArray;
 41:   PetscBool  print;
 42:   PetscBool  print_entropy;
 43: } PrintCtx;

 45: PetscErrorCode MatMultMtM_SeqAIJ(Mat MtM, Vec xx, Vec yy)
 46: {
 47:   MatShellCtx *matshellctx;

 49:   PetscFunctionBeginUser;
 50:   PetscCall(MatShellGetContext(MtM, &matshellctx));
 51:   PetscCheck(matshellctx, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "No context");
 52:   PetscCall(MatMult(matshellctx->Mp, xx, matshellctx->ff));
 53:   PetscCall(MatMult(matshellctx->MpTrans, matshellctx->ff, yy));
 54:   PetscFunctionReturn(PETSC_SUCCESS);
 55: }

 57: PetscErrorCode MatMultAddMtM_SeqAIJ(Mat MtM, Vec xx, Vec yy, Vec zz)
 58: {
 59:   MatShellCtx *matshellctx;

 61:   PetscFunctionBeginUser;
 62:   PetscCall(MatShellGetContext(MtM, &matshellctx));
 63:   PetscCheck(matshellctx, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "No context");
 64:   PetscCall(MatMult(matshellctx->Mp, xx, matshellctx->ff));
 65:   PetscCall(MatMultAdd(matshellctx->MpTrans, matshellctx->ff, yy, zz));
 66:   PetscFunctionReturn(PETSC_SUCCESS);
 67: }

 69: PetscErrorCode createSwarm(const DM dm, PetscInt dim, DM *sw)
 70: {
 71:   PetscInt Nc = 1;

 73:   PetscFunctionBeginUser;
 74:   PetscCall(DMCreate(PETSC_COMM_SELF, sw));
 75:   PetscCall(DMSetType(*sw, DMSWARM));
 76:   PetscCall(DMSetDimension(*sw, dim));
 77:   PetscCall(DMSwarmSetType(*sw, DMSWARM_PIC));
 78:   PetscCall(DMSwarmSetCellDM(*sw, dm));
 79:   PetscCall(DMSwarmRegisterPetscDatatypeField(*sw, "w_q", Nc, PETSC_REAL));
 80:   PetscCall(DMSwarmFinalizeFieldRegister(*sw));
 81:   PetscCall(DMSetFromOptions(*sw));
 82:   PetscCall(PetscObjectSetName((PetscObject)*sw, "Particle Grid"));
 83:   PetscFunctionReturn(PETSC_SUCCESS);
 84: }

 86: static PetscErrorCode makeSwarm(DM sw, const PetscInt dim, const PetscInt Np, const PetscReal xx[], const PetscReal yy[], const PetscReal zz[])
 87: {
 88:   PetscReal    *coords;
 89:   PetscDataType dtype;
 90:   PetscInt      bs, p, zero = 0;

 92:   PetscFunctionBeginUser;
 93:   PetscCall(DMSwarmSetLocalSizes(sw, Np, zero));
 94:   PetscCall(DMSwarmGetField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
 95:   for (p = 0; p < Np; p++) {
 96:     coords[p * dim + 0] = xx[p];
 97:     coords[p * dim + 1] = yy[p];
 98:     if (dim == 3) coords[p * dim + 2] = zz[p];
 99:   }
100:   PetscCall(DMSwarmRestoreField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
101:   PetscFunctionReturn(PETSC_SUCCESS);
102: }

104: static PetscErrorCode createMp(const DM dm, DM sw, Mat *Mp_out)
105: {
106:   PetscBool removePoints = PETSC_TRUE;
107:   Mat       M_p;

109:   PetscFunctionBeginUser;
110:   // migrate after coords are set
111:   PetscCall(DMSwarmMigrate(sw, removePoints));
112:   PetscCall(PetscObjectSetName((PetscObject)sw, "Particle Grid"));
113:   /* This gives M f = \int_\Omega \phi f, which looks like a rhs for a PDE */
114:   PetscCall(DMCreateMassMatrix(sw, dm, &M_p));
115:   PetscCall(DMViewFromOptions(sw, NULL, "-ex30_sw_view"));
116:   // output
117:   *Mp_out = M_p;
118:   PetscFunctionReturn(PETSC_SUCCESS);
119: }

121: static PetscErrorCode particlesToGrid(const DM dm, DM sw, const PetscInt a_tid, const PetscInt dim, const PetscReal a_wp[], Vec rho, Mat M_p)
122: {
123:   PetscReal    *wq;
124:   PetscDataType dtype;
125:   Vec           ff;
126:   PetscInt      bs, p, Np;

128:   PetscFunctionBeginUser;
129:   PetscCall(DMSwarmGetField(sw, "w_q", &bs, &dtype, (void **)&wq));
130:   PetscCall(DMSwarmGetLocalSize(sw, &Np));
131:   for (p = 0; p < Np; p++) wq[p] = a_wp[p];
132:   PetscCall(DMSwarmRestoreField(sw, "w_q", &bs, &dtype, (void **)&wq));
133:   PetscCall(PetscObjectSetName((PetscObject)rho, "rho"));
134:   PetscCall(DMSwarmCreateGlobalVectorFromField(sw, "w_q", &ff));
135:   PetscCall(PetscObjectSetName((PetscObject)ff, "weights"));
136:   PetscCall(MatMultTranspose(M_p, ff, rho));
137:   PetscCall(DMSwarmDestroyGlobalVectorFromField(sw, "w_q", &ff));
138:   PetscFunctionReturn(PETSC_SUCCESS);
139: }

141: //
142: // add grid to arg 'sw.w_q'
143: //
144: PetscErrorCode gridToParticles(const DM dm, DM sw, const Vec rhs, Vec work, Mat M_p, Mat Mass)
145: {
146:   PetscBool    is_lsqr;
147:   KSP          ksp;
148:   Mat          PM_p = NULL, MtM, D;
149:   Vec          ff;
150:   PetscInt     N, M, nzl;
151:   MatShellCtx *matshellctx;
152:   PC           pc;

154:   PetscFunctionBeginUser;
155:   // (Mp Mp)^-1 M
156:   PetscCall(MatMult(Mass, rhs, work));
157:   // pseudo-inverse
158:   PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
159:   PetscCall(KSPSetType(ksp, KSPCG));
160:   PetscCall(KSPGetPC(ksp, &pc));
161:   PetscCall(PCSetType(pc, PCJACOBI));
162:   PetscCall(KSPSetOptionsPrefix(ksp, "ftop_"));
163:   PetscCall(KSPSetFromOptions(ksp));
164:   PetscCall(PetscObjectTypeCompare((PetscObject)ksp, KSPLSQR, &is_lsqr));
165:   if (!is_lsqr) {
166:     PetscCall(MatGetLocalSize(M_p, &M, &N));
167:     if (N > M) {
168:       PetscCall(PetscInfo(ksp, " M (%" PetscInt_FMT ") < M (%" PetscInt_FMT ") more vertices than particles: revert to lsqr\n", M, N));
169:       is_lsqr = PETSC_TRUE;
170:       PetscCall(KSPSetType(ksp, KSPLSQR));
171:       PetscCall(PCSetType(pc, PCNONE)); // should not happen, but could solve stable (Mp Mp^T), move projection Mp before solve
172:     } else {
173:       PetscCall(PetscNew(&matshellctx));
174:       PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm), N, N, PETSC_DECIDE, PETSC_DECIDE, matshellctx, &MtM));
175:       PetscCall(MatTranspose(M_p, MAT_INITIAL_MATRIX, &matshellctx->MpTrans));
176:       matshellctx->Mp = M_p;
177:       PetscCall(MatShellSetOperation(MtM, MATOP_MULT, (void (*)(void))MatMultMtM_SeqAIJ));
178:       PetscCall(MatShellSetOperation(MtM, MATOP_MULT_ADD, (void (*)(void))MatMultAddMtM_SeqAIJ));
179:       PetscCall(MatCreateVecs(M_p, &matshellctx->uu, &matshellctx->ff));
180:       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, N, N, 1, NULL, &D));
181:       PetscCall(MatViewFromOptions(matshellctx->MpTrans, NULL, "-ftop2_Mp_mat_view"));
182:       for (int i = 0; i < N; i++) {
183:         const PetscScalar *vals;
184:         const PetscInt    *cols;
185:         PetscScalar        dot = 0;
186:         PetscCall(MatGetRow(matshellctx->MpTrans, i, &nzl, &cols, &vals));
187:         for (int ii = 0; ii < nzl; ii++) dot += PetscSqr(vals[ii]);
188:         if (dot == 0.0) dot = 1; // empty rows
189:         PetscCall(MatSetValue(D, i, i, dot, INSERT_VALUES));
190:       }
191:       PetscCall(MatAssemblyBegin(D, MAT_FINAL_ASSEMBLY));
192:       PetscCall(MatAssemblyEnd(D, MAT_FINAL_ASSEMBLY));
193:       PetscCall(PetscInfo(M_p, "createMtMKSP Have %" PetscInt_FMT " eqs, nzl = %" PetscInt_FMT "\n", N, nzl));
194:       PetscCall(KSPSetOperators(ksp, MtM, D));
195:       PetscCall(MatViewFromOptions(D, NULL, "-ftop2_D_mat_view"));
196:       PetscCall(MatViewFromOptions(M_p, NULL, "-ftop2_Mp_mat_view"));
197:       PetscCall(MatViewFromOptions(matshellctx->MpTrans, NULL, "-ftop2_MpTranspose_mat_view"));
198:       PetscCall(MatViewFromOptions(MtM, NULL, "-ftop2_MtM_mat_view"));
199:     }
200:   }
201:   if (is_lsqr) {
202:     PC        pc;
203:     PetscBool is_bjac;
204:     PetscCall(KSPGetPC(ksp, &pc));
205:     PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCBJACOBI, &is_bjac));
206:     if (is_bjac) {
207:       PetscCall(DMSwarmCreateMassMatrixSquare(sw, dm, &PM_p));
208:       PetscCall(KSPSetOperators(ksp, M_p, PM_p));
209:     } else {
210:       PetscCall(KSPSetOperators(ksp, M_p, M_p));
211:     }
212:   }
213:   PetscCall(DMSwarmCreateGlobalVectorFromField(sw, "w_q", &ff)); // this grabs access
214:   if (!is_lsqr) {
215:     PetscCall(KSPSolve(ksp, work, matshellctx->uu));
216:     PetscCall(MatMult(M_p, matshellctx->uu, ff));
217:     PetscCall(MatDestroy(&matshellctx->MpTrans));
218:     PetscCall(VecDestroy(&matshellctx->ff));
219:     PetscCall(VecDestroy(&matshellctx->uu));
220:     PetscCall(MatDestroy(&D));
221:     PetscCall(MatDestroy(&MtM));
222:     PetscCall(PetscFree(matshellctx));
223:   } else {
224:     PetscCall(KSPSolveTranspose(ksp, work, ff));
225:   }
226:   PetscCall(KSPDestroy(&ksp));
227:   PetscCall(MatDestroy(&PM_p));
228:   PetscCall(DMSwarmDestroyGlobalVectorFromField(sw, "w_q", &ff));
229:   PetscFunctionReturn(PETSC_SUCCESS);
230: }

232: #define EX30_MAX_NUM_THRDS 12
233: #define EX30_MAX_BATCH_SZ  1024
234: //
235: // add grid to arg 'globSwarmArray[].w_q'
236: //
237: PetscErrorCode gridToParticles_private(DM grid_dm[], DM globSwarmArray[], const PetscInt dim, const PetscInt v_target, const PetscInt numthreads, const PetscInt num_vertices, const PetscInt global_vertex_id, Mat globMpArray[], Mat g_Mass[], Vec t_fhat[][EX30_MAX_NUM_THRDS], PetscReal moments[], Vec globXArray[], LandauCtx *ctx)
238: {
239:   PetscErrorCode ierr = (PetscErrorCode)0; // used for inside thread loops

241:   PetscFunctionBeginUser;
242:   // map back to particles
243:   for (PetscInt v_id_0 = 0; v_id_0 < ctx->batch_sz; v_id_0 += numthreads) {
244:     PetscCall(PetscInfo(grid_dm[0], "g2p: global batch %" PetscInt_FMT " of %" PetscInt_FMT ", Landau batch %" PetscInt_FMT " of %" PetscInt_FMT ": map back to particles\n", global_vertex_id + 1, num_vertices, v_id_0 + 1, ctx->batch_sz));
245:     //PetscPragmaOMP(parallel for)
246:     for (int tid = 0; tid < numthreads; tid++) {
247:       const PetscInt v_id = v_id_0 + tid, glb_v_id = global_vertex_id + v_id;
248:       if (glb_v_id < num_vertices) {
249:         for (PetscInt grid = 0; grid < ctx->num_grids; grid++) { // add same particels for all grids
250:           PetscErrorCode ierr_t;
251:           ierr_t = PetscInfo(grid_dm[0], "gridToParticles: global batch %" PetscInt_FMT ", local batch b=%" PetscInt_FMT ", grid g=%" PetscInt_FMT ", index(b,g) %" PetscInt_FMT "\n", global_vertex_id, v_id, grid, LAND_PACK_IDX(v_id, grid));
252:           ierr_t = gridToParticles(grid_dm[grid], globSwarmArray[LAND_PACK_IDX(v_id, grid)], globXArray[LAND_PACK_IDX(v_id, grid)], t_fhat[grid][tid], globMpArray[LAND_PACK_IDX(v_id, grid)], g_Mass[grid]);
253:           if (ierr_t) ierr = ierr_t;
254:         }
255:       }
256:     }
257:     PetscCheck(!ierr, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Error in OMP loop. ierr = %d", (int)ierr);
258:     /* Get moments */
259:     PetscCall(PetscInfo(grid_dm[0], "Cleanup batches %" PetscInt_FMT " to %" PetscInt_FMT "\n", v_id_0, v_id_0 + numthreads));
260:     for (int tid = 0; tid < numthreads; tid++) {
261:       const PetscInt v_id = v_id_0 + tid, glb_v_id = global_vertex_id + v_id;
262:       if (glb_v_id == v_target) {
263:         for (PetscInt grid = 0; grid < ctx->num_grids; grid++) {
264:           PetscDataType dtype;
265:           PetscReal    *wp, *coords;
266:           DM            sw = globSwarmArray[LAND_PACK_IDX(v_id, grid)];
267:           PetscInt      npoints, bs = 1;
268:           PetscCall(DMSwarmGetField(sw, "w_q", &bs, &dtype, (void **)&wp)); // take data out here
269:           PetscCall(DMSwarmGetField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
270:           PetscCall(DMSwarmGetLocalSize(sw, &npoints));
271:           for (int p = 0; p < npoints; p++) {
272:             PetscReal v2 = 0, fact = (dim == 2) ? 2.0 * PETSC_PI * coords[p * dim + 0] : 1, w = fact * wp[p] * ctx->n_0 * ctx->masses[ctx->species_offset[grid]];
273:             for (int i = 0; i < dim; ++i) v2 += PetscSqr(coords[p * dim + i]);
274:             moments[0] += w;
275:             moments[1] += w * ctx->v_0 * coords[p * dim + 1]; // z-momentum
276:             moments[2] += w * 0.5 * ctx->v_0 * ctx->v_0 * v2;
277:           }
278:           PetscCall(DMSwarmRestoreField(sw, "w_q", &bs, &dtype, (void **)&wp));
279:           PetscCall(DMSwarmRestoreField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
280:         }
281:         const PetscReal N_inv = 1 / moments[0];
282:         PetscCall(PetscInfo(grid_dm[0], "gridToParticles_private [%" PetscInt_FMT "], n = %g\n", v_id, (double)moments[0]));
283:         for (PetscInt grid = 0; grid < ctx->num_grids; grid++) {
284:           PetscDataType dtype;
285:           PetscReal    *wp, *coords;
286:           DM            sw = globSwarmArray[LAND_PACK_IDX(v_id, grid)];
287:           PetscInt      npoints, bs = 1;
288:           PetscCall(DMSwarmGetField(sw, "w_q", &bs, &dtype, (void **)&wp)); // take data out here
289:           PetscCall(DMSwarmGetField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
290:           PetscCall(DMSwarmGetLocalSize(sw, &npoints));
291:           for (int p = 0; p < npoints; p++) {
292:             const PetscReal fact = dim == 2 ? 2.0 * PETSC_PI * coords[p * dim + 0] : 1, w = fact * wp[p] * ctx->n_0 * ctx->masses[ctx->species_offset[grid]], ww = w * N_inv;
293:             if (w > PETSC_REAL_MIN) {
294:               moments[3] -= ww * PetscLogReal(ww);
295:               PetscCheck(ww < 1 - PETSC_MACHINE_EPSILON, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "ww (%g) > 1", (double)ww);
296:             } else moments[4] -= w;
297:           }
298:           PetscCall(DMSwarmRestoreField(sw, "w_q", &bs, &dtype, (void **)&wp));
299:           PetscCall(DMSwarmRestoreField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
300:         }
301:       }
302:     } // thread batch
303:   } // batch
304:   PetscFunctionReturn(PETSC_SUCCESS);
305: }

307: static void maxwellian(PetscInt dim, const PetscReal x[], PetscReal kt_m, PetscReal n, PetscReal shift, PetscScalar *u)
308: {
309:   PetscInt  i;
310:   PetscReal v2 = 0, theta = 2.0 * kt_m; /* theta = 2kT/mc^2 */

312:   if (shift != 0.) {
313:     v2 = 0;
314:     for (i = 0; i < dim - 1; ++i) v2 += x[i] * x[i];
315:     v2 += (x[dim - 1] - shift) * (x[dim - 1] - shift);
316:     /* evaluate the shifted Maxwellian */
317:     u[0] += n * PetscPowReal(PETSC_PI * theta, -1.5) * (PetscExpReal(-v2 / theta));
318:   } else {
319:     /* compute the exponents, v^2 */
320:     for (i = 0; i < dim; ++i) v2 += x[i] * x[i];
321:     /* evaluate the Maxwellian */
322:     u[0] += n * PetscPowReal(PETSC_PI * theta, -1.5) * (PetscExpReal(-v2 / theta));
323:   }
324: }

326: static PetscErrorCode PostStep(TS ts)
327: {
328:   PetscInt   n, dim, nDMs, v_id;
329:   PetscReal  t;
330:   LandauCtx *ctx;
331:   Vec        X;
332:   PrintCtx  *printCtx;
333:   DM         pack;
334:   PetscReal  moments[5], e_grid[LANDAU_MAX_GRIDS];

336:   PetscFunctionBeginUser;
337:   PetscCall(TSGetApplicationContext(ts, &printCtx));
338:   if (!printCtx->print && !printCtx->print_entropy) PetscFunctionReturn(PETSC_SUCCESS);
339:   ctx = printCtx->ctx;
340:   if (printCtx->v_target < printCtx->global_vertex_id_0 || printCtx->v_target >= printCtx->global_vertex_id_0 + ctx->batch_sz) PetscFunctionReturn(PETSC_SUCCESS);
341:   for (int i = 0; i < 5; i++) moments[i] = 0;
342:   for (int i = 0; i < LANDAU_MAX_GRIDS; i++) e_grid[i] = 0;
343:   v_id = printCtx->v_target % ctx->batch_sz;
344:   PetscCall(TSGetDM(ts, &pack));
345:   PetscCall(DMGetDimension(pack, &dim));
346:   PetscCall(DMCompositeGetNumberDM(pack, &nDMs)); // number of vertices * number of grids
347:   PetscCall(TSGetSolution(ts, &X));
348:   PetscCall(TSGetStepNumber(ts, &n));
349:   PetscCall(TSGetTime(ts, &t));
350:   PetscCall(DMCompositeGetAccessArray(pack, X, nDMs, NULL, printCtx->globXArray));
351:   if (printCtx->print_entropy && printCtx->v_target >= 0) {
352:     for (PetscInt grid = 0; grid < ctx->num_grids; grid++) {
353:       PetscDataType dtype;
354:       PetscReal    *wp, *coords;
355:       DM            sw = printCtx->globSwarmArray[LAND_PACK_IDX(v_id, grid)];
356:       Vec           work, subX = printCtx->globXArray[LAND_PACK_IDX(v_id, grid)];
357:       PetscInt      bs, NN;
358:       // C-G moments
359:       PetscCall(VecDuplicate(subX, &work));
360:       PetscCall(gridToParticles(printCtx->grid_dm[grid], sw, subX, work, printCtx->globMpArray[LAND_PACK_IDX(v_id, grid)], printCtx->g_Mass[grid]));
361:       PetscCall(VecDestroy(&work));
362:       // moments
363:       PetscCall(DMSwarmGetField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
364:       PetscCall(DMSwarmGetLocalSize(sw, &NN));
365:       PetscCall(DMSwarmGetField(sw, "w_q", &bs, &dtype, (void **)&wp));
366:       for (int pp = 0; pp < NN; pp++) {
367:         PetscReal v2 = 0, fact = (dim == 2) ? 2.0 * PETSC_PI * coords[pp * dim + 0] : 1, w = fact * wp[pp] * ctx->n_0 * ctx->masses[ctx->species_offset[grid]];
368:         for (int i = 0; i < dim; ++i) v2 += PetscSqr(coords[pp * dim + i]);
369:         moments[0] += w;
370:         moments[1] += w * ctx->v_0 * coords[pp * dim + 1]; // z-momentum
371:         moments[2] += w * 0.5 * ctx->v_0 * ctx->v_0 * v2;
372:         e_grid[grid] += w * 0.5 * ctx->v_0 * ctx->v_0 * v2;
373:       }
374:       PetscCall(DMSwarmRestoreField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
375:       PetscCall(DMSwarmRestoreField(sw, "w_q", &bs, &dtype, (void **)&wp));
376:     }
377:     // entropy
378:     const PetscReal N_inv = 1 / moments[0];
379:     for (PetscInt grid = 0; grid < ctx->num_grids; grid++) {
380:       PetscDataType dtype;
381:       PetscReal    *wp, *coords;
382:       DM            sw = printCtx->globSwarmArray[LAND_PACK_IDX(v_id, grid)];
383:       PetscInt      bs, NN;
384:       PetscCall(DMSwarmGetField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
385:       PetscCall(DMSwarmGetLocalSize(sw, &NN));
386:       PetscCall(DMSwarmGetField(sw, "w_q", &bs, &dtype, (void **)&wp));
387:       for (int pp = 0; pp < NN; pp++) {
388:         PetscReal fact = (dim == 2) ? 2.0 * PETSC_PI * coords[pp * dim + 0] : 1, w = fact * wp[pp] * ctx->n_0 * ctx->masses[ctx->species_offset[grid]], ww = w * N_inv;
389:         if (w > PETSC_REAL_MIN) {
390:           moments[3] -= ww * PetscLogReal(ww);
391:           PetscCheck(ww < 1 - PETSC_MACHINE_EPSILON, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "ww (%g) > 1", (double)ww);
392:         } else moments[4] -= w;
393:       }
394:       PetscCall(DMSwarmRestoreField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
395:       PetscCall(DMSwarmRestoreField(sw, "w_q", &bs, &dtype, (void **)&wp));
396:     }
397:     PetscCall(PetscInfo(X, "%4d) time %e, Landau particle moments: 0: %18.12e 1: %19.12e 2: %18.12e entropy: %e loss %e. energy = %e + %e + %e\n", (int)n, (double)t, (double)moments[0], (double)moments[1], (double)moments[2], (double)moments[3], (double)moments[4], (double)e_grid[0], (double)e_grid[1], (double)e_grid[2]));
398:   }
399:   if (printCtx->print && printCtx->g_target >= 0) {
400:     PetscInt         grid   = printCtx->g_target, id;
401:     static PetscReal last_t = -100000, period = .5;
402:     if (last_t == -100000) last_t = -period + t;
403:     if (t >= last_t + period) {
404:       last_t = t;
405:       PetscCall(DMGetOutputSequenceNumber(ctx->plex[grid], &id, NULL));
406:       PetscCall(DMSetOutputSequenceNumber(ctx->plex[grid], id + 1, t));
407:       PetscCall(VecViewFromOptions(printCtx->globXArray[LAND_PACK_IDX(v_id % ctx->batch_sz, grid)], NULL, "-ex30_vec_view"));
408:       if (ctx->num_grids > grid + 1) {
409:         PetscCall(DMSetOutputSequenceNumber(ctx->plex[grid + 1], id + 1, t));
410:         PetscCall(VecViewFromOptions(printCtx->globXArray[LAND_PACK_IDX(v_id % ctx->batch_sz, grid + 1)], NULL, "-ex30_vec_view2"));
411:       }
412:       PetscCall(PetscInfo(X, "%4d) time %e View\n", (int)n, (double)t));
413:     }
414:   }
415:   PetscCall(DMCompositeRestoreAccessArray(pack, X, nDMs, NULL, printCtx->globXArray));
416:   PetscFunctionReturn(PETSC_SUCCESS);
417: }

419: PetscErrorCode go(TS ts, Vec X, const PetscInt num_vertices, const PetscInt a_Np, const PetscInt dim, const PetscInt v_target, const PetscInt g_target, PetscReal shift, PetscBool use_uniform_particle_grid)
420: {
421:   DM             pack, *globSwarmArray, grid_dm[LANDAU_MAX_GRIDS];
422:   Mat           *globMpArray, g_Mass[LANDAU_MAX_GRIDS];
423:   KSP            t_ksp[LANDAU_MAX_GRIDS][EX30_MAX_NUM_THRDS];
424:   Vec            t_fhat[LANDAU_MAX_GRIDS][EX30_MAX_NUM_THRDS];
425:   PetscInt       nDMs;
426:   PetscErrorCode ierr = (PetscErrorCode)0; // used for inside thread loops
427: #if defined(PETSC_HAVE_OPENMP) && defined(PETSC_HAVE_THREADSAFETY)
428:   PetscInt numthreads = PetscNumOMPThreads;
429: #else
430:   PetscInt numthreads = 1;
431: #endif
432:   LandauCtx *ctx;
433:   Vec       *globXArray;
434:   PetscReal  moments_0[5], moments_1a[5], moments_1b[5], dt_init;
435:   PrintCtx  *printCtx;

437:   PetscFunctionBeginUser;
438:   PetscCheck(numthreads <= EX30_MAX_NUM_THRDS, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Too many threads %" PetscInt_FMT " > %d", numthreads, EX30_MAX_NUM_THRDS);
439:   PetscCheck(numthreads > 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Number threads %" PetscInt_FMT " > %d", numthreads, EX30_MAX_NUM_THRDS);
440:   PetscCall(TSGetDM(ts, &pack));
441:   PetscCall(DMGetApplicationContext(pack, &ctx));
442:   PetscCheck(ctx->batch_sz % numthreads == 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "batch size (-dm_landau_batch_size) %" PetscInt_FMT "  mod #threads %" PetscInt_FMT " must equal zero", ctx->batch_sz, numthreads);
443:   PetscCall(DMCompositeGetNumberDM(pack, &nDMs)); // number of vertices * number of grids
444:   PetscCall(PetscInfo(pack, "Have %" PetscInt_FMT " total grids, with %" PetscInt_FMT " Landau local batched and %" PetscInt_FMT " global items (vertices) %d DMs\n", ctx->num_grids, ctx->batch_sz, num_vertices, (int)nDMs));
445:   PetscCall(PetscMalloc(sizeof(*globXArray) * nDMs, &globXArray));
446:   PetscCall(PetscMalloc(sizeof(*globMpArray) * nDMs, &globMpArray));
447:   PetscCall(PetscMalloc(sizeof(*globSwarmArray) * nDMs, &globSwarmArray));
448:   // print ctx
449:   PetscCall(PetscNew(&printCtx));
450:   PetscCall(TSSetApplicationContext(ts, printCtx));
451:   printCtx->v_target       = v_target;
452:   printCtx->g_target       = g_target;
453:   printCtx->ctx            = ctx;
454:   printCtx->globSwarmArray = globSwarmArray;
455:   printCtx->grid_dm        = grid_dm;
456:   printCtx->globMpArray    = globMpArray;
457:   printCtx->g_Mass         = g_Mass;
458:   printCtx->globXArray     = globXArray;
459:   printCtx->print_entropy  = PETSC_FALSE;
460:   PetscOptionsBegin(PETSC_COMM_SELF, "", "Print Options", "DMPLEX");
461:   PetscCall(PetscOptionsBool("-print_entropy", "Print entropy and moments at each time step", "ex30.c", printCtx->print_entropy, &printCtx->print_entropy, NULL));
462:   PetscOptionsEnd();
463:   // view
464:   PetscCall(DMViewFromOptions(ctx->plex[g_target], NULL, "-ex30_dm_view"));
465:   if (ctx->num_grids > g_target + 1) { PetscCall(DMViewFromOptions(ctx->plex[g_target + 1], NULL, "-ex30_dm_view2")); }
466:   // create mesh mass matrices
467:   PetscCall(VecZeroEntries(X));
468:   PetscCall(DMCompositeGetAccessArray(pack, X, nDMs, NULL, globXArray)); // just to duplicate
469:   for (PetscInt grid = 0; grid < ctx->num_grids; grid++) {               // add same particels for all grids
470:     Vec          subX = globXArray[LAND_PACK_IDX(0, grid)];
471:     DM           dm   = ctx->plex[grid];
472:     PetscSection s;
473:     grid_dm[grid] = dm;
474:     PetscCall(DMCreateMassMatrix(dm, dm, &g_Mass[grid]));
475:     //
476:     PetscCall(DMGetLocalSection(dm, &s));
477:     PetscCall(DMPlexCreateClosureIndex(dm, s));
478:     for (int tid = 0; tid < numthreads; tid++) {
479:       PC pc;
480:       PetscCall(VecDuplicate(subX, &t_fhat[grid][tid]));
481:       PetscCall(KSPCreate(PETSC_COMM_SELF, &t_ksp[grid][tid]));
482:       PetscCall(KSPSetType(t_ksp[grid][tid], KSPCG));
483:       PetscCall(KSPGetPC(t_ksp[grid][tid], &pc));
484:       PetscCall(PCSetType(pc, PCJACOBI));
485:       PetscCall(KSPSetOptionsPrefix(t_ksp[grid][tid], "ptof_"));
486:       PetscCall(KSPSetOperators(t_ksp[grid][tid], g_Mass[grid], g_Mass[grid]));
487:       PetscCall(KSPSetFromOptions(t_ksp[grid][tid]));
488:     }
489:   }
490:   PetscCall(DMCompositeRestoreAccessArray(pack, X, nDMs, NULL, globXArray));
491:   PetscCall(TSGetTimeStep(ts, &dt_init)); // we could have an adaptive time stepper
492:   // loop over all vertices in chucks that are batched for TSSolve
493:   for (int i = 0; i < 5; i++) moments_0[i] = moments_1a[i] = moments_1b[i] = 0;
494:   for (PetscInt global_vertex_id_0 = 0; global_vertex_id_0 < num_vertices; global_vertex_id_0 += ctx->batch_sz, shift /= 2) { // outer vertex loop
495:     PetscCall(TSSetTime(ts, 0));
496:     PetscCall(TSSetStepNumber(ts, 0));
497:     PetscCall(TSSetTimeStep(ts, dt_init));
498:     PetscCall(DMCompositeGetAccessArray(pack, X, nDMs, NULL, globXArray));
499:     printCtx->global_vertex_id_0 = global_vertex_id_0;
500:     if (v_target >= global_vertex_id_0 && v_target < global_vertex_id_0 + ctx->batch_sz) {
501:       PetscCall(PetscObjectSetName((PetscObject)globXArray[LAND_PACK_IDX(v_target % ctx->batch_sz, g_target)], "rho"));
502:       printCtx->print = PETSC_TRUE;
503:     } else printCtx->print = PETSC_FALSE;
504:     // create fake particles in batches with threads
505:     for (PetscInt v_id_0 = 0; v_id_0 < ctx->batch_sz; v_id_0 += numthreads) {
506:       PetscReal *xx_t[LANDAU_MAX_GRIDS][EX30_MAX_NUM_THRDS], *yy_t[LANDAU_MAX_GRIDS][EX30_MAX_NUM_THRDS], *zz_t[LANDAU_MAX_GRIDS][EX30_MAX_NUM_THRDS], *wp_t[LANDAU_MAX_GRIDS][EX30_MAX_NUM_THRDS];
507:       PetscInt   Np_t[LANDAU_MAX_GRIDS][EX30_MAX_NUM_THRDS];
508:       // make particles
509:       for (int tid = 0; tid < numthreads; tid++) {
510:         const PetscInt v_id = v_id_0 + tid, glb_v_id = global_vertex_id_0 + v_id;
511:         if (glb_v_id < num_vertices) {                                                                                                                                            // the ragged edge (in last batch)
512:           PetscInt Npp0 = a_Np + (glb_v_id % (a_Np / 10 + 1)), nTargetP[LANDAU_MAX_GRIDS];                                                                                        // n of particels in each dim with load imbalance
513:           for (PetscInt grid = 0; grid < ctx->num_grids; grid++) {                                                                                                                // add same particels for all grids
514:             const PetscReal kT_m  = ctx->k * ctx->thermal_temps[ctx->species_offset[grid]] / ctx->masses[ctx->species_offset[grid]] / (ctx->v_0 * ctx->v_0);                      /* theta = 2kT/mc^2 per species */
515:             PetscReal       lo[3] = {-ctx->radius[grid], -ctx->radius[grid], -ctx->radius[grid]}, hi[3] = {ctx->radius[grid], ctx->radius[grid], ctx->radius[grid]}, hp[3], vole; // would be nice to get box from DM
516:             PetscInt        Npi = Npp0, Npj = 2 * Npp0, Npk = 1;
517:             PetscRandom     rand;
518:             PetscReal       sigma = ctx->thermal_speed[grid] / ctx->thermal_speed[0], p2_shift = grid == 0 ? shift : -shift;
519:             PetscCall(PetscRandomCreate(PETSC_COMM_SELF, &rand));
520:             PetscCall(PetscRandomSetInterval(rand, 0., 1.));
521:             PetscCall(PetscRandomSetFromOptions(rand));
522:             if (dim == 2) lo[0] = 0; // Landau coordinate (r,z)
523:             else Npi = Npj = Npk = Npp0;
524:             // User: use glb_v_id to index into your data
525:             const PetscInt NN = Npi * Npj * Npk; // make a regular grid of particles Npp x Npp
526:             Np_t[grid][tid]   = NN;
527:             if (glb_v_id == v_target) nTargetP[grid] = NN;
528:             PetscCall(PetscMalloc4(NN, &xx_t[grid][tid], NN, &yy_t[grid][tid], NN, &wp_t[grid][tid], dim == 2 ? 1 : NN, &zz_t[grid][tid]));
529:             hp[0] = (hi[0] - lo[0]) / Npi;
530:             hp[1] = (hi[1] - lo[1]) / Npj;
531:             hp[2] = (hi[2] - lo[2]) / Npk;
532:             if (dim == 2) hp[2] = 1;
533:             PetscCall(PetscInfo(pack, " lo = %14.7e, hi = %14.7e; hp = %14.7e, %14.7e; kT_m = %g; \n", (double)lo[1], (double)hi[1], (double)hp[0], (double)hp[1], (double)kT_m)); // temp
534:             vole = hp[0] * hp[1] * hp[2] * ctx->n[grid];                                                                                                                           // fix for multi-species
535:             PetscCall(PetscInfo(pack, "Vertex %" PetscInt_FMT ", grid %" PetscInt_FMT " with %" PetscInt_FMT " particles (diagnostic target = %" PetscInt_FMT ")\n", glb_v_id, grid, NN, v_target));
536:             for (int pj = 0, pp = 0; pj < Npj; pj++) {
537:               for (int pk = 0; pk < Npk; pk++) {
538:                 for (int pi = 0; pi < Npi; pi++, pp++) {
539:                   PetscReal p_shift   = p2_shift;
540:                   wp_t[grid][tid][pp] = 0;
541:                   if (use_uniform_particle_grid) {
542:                     xx_t[grid][tid][pp] = lo[0] + hp[0] / 2.0 + pi * hp[0];
543:                     yy_t[grid][tid][pp] = lo[1] + hp[1] / 2.0 + pj * hp[1];
544:                     if (dim == 3) zz_t[grid][tid][pp] = lo[2] + hp[2] / 2.0 + pk * hp[2];
545:                     PetscReal x[] = {xx_t[grid][tid][pp], yy_t[grid][tid][pp], dim == 2 ? 0 : zz_t[grid][tid][pp]};
546:                     p_shift *= ctx->thermal_speed[grid] / ctx->v_0;
547:                     maxwellian(dim, x, kT_m, vole, p_shift, &wp_t[grid][tid][pp]);
548:                     if (ctx->num_grids == 1 && shift != 0) { // bi-maxwellian, electron plasma
549:                       maxwellian(dim, x, kT_m, vole, -p_shift, &wp_t[grid][tid][pp]);
550:                     }
551:                   } else {
552:                     PetscReal u1, u2;
553:                     do {
554:                       do {
555:                         PetscCall(PetscRandomGetValueReal(rand, &u1));
556:                       } while (u1 == 0);
557:                       PetscCall(PetscRandomGetValueReal(rand, &u2));
558:                       //compute z0 and z1
559:                       PetscReal mag       = sigma * PetscSqrtReal(-2.0 * PetscLogReal(u1));
560:                       xx_t[grid][tid][pp] = mag * PetscCosReal(2.0 * PETSC_PI * u2);
561:                       yy_t[grid][tid][pp] = mag * PetscSinReal(2.0 * PETSC_PI * u2);
562:                       if (dim == 2 && xx_t[grid][tid][pp] < lo[0]) xx_t[grid][tid][pp] = -xx_t[grid][tid][pp];
563:                       if (dim == 3) zz_t[grid][tid][pp] = lo[2] + hp[2] / 2.0 + pk * hp[2];
564:                       if (ctx->num_grids == 1 && pp % 2 == 0) p_shift = 0; // one species, split bi-max
565:                       p_shift *= ctx->thermal_speed[grid] / ctx->v_0;
566:                       if (dim == 3) zz_t[grid][tid][pp] += p_shift;
567:                       else yy_t[grid][tid][pp] += p_shift;
568:                       wp_t[grid][tid][pp] += ctx->n[grid] / NN * PetscSqrtReal(ctx->masses[ctx->species_offset[grid]] / ctx->masses[0]);
569:                       if (p_shift <= 0) break; // add bi-max for electron plasma only
570:                       p_shift = -p_shift;
571:                     } while (ctx->num_grids == 1); // add bi-max for electron plasma only
572:                   }
573:                   {
574:                     if (glb_v_id == v_target) {
575:                       PetscReal x[] = {xx_t[grid][tid][pp], yy_t[grid][tid][pp], dim == 2 ? 0 : zz_t[grid][tid][pp]};
576:                       PetscReal v2 = 0, fact = dim == 2 ? 2.0 * PETSC_PI * x[0] : 1, w = fact * wp_t[grid][tid][pp] * ctx->n_0 * ctx->masses[ctx->species_offset[grid]];
577:                       for (int i = 0; i < dim; ++i) v2 += PetscSqr(x[i]);
578:                       moments_0[0] += w;                   // not thread safe
579:                       moments_0[1] += w * ctx->v_0 * x[1]; // z-momentum
580:                       moments_0[2] += w * 0.5 * ctx->v_0 * ctx->v_0 * v2;
581:                     }
582:                   }
583:                 }
584:               }
585:             }
586:             PetscCall(PetscRandomDestroy(&rand));
587:           }
588:           // entropy init, need global n
589:           if (glb_v_id == v_target) {
590:             const PetscReal N_inv = 1 / moments_0[0];
591:             PetscCall(PetscInfo(pack, "Target %" PetscInt_FMT " with %" PetscInt_FMT " particels\n", glb_v_id, nTargetP[0]));
592:             for (PetscInt grid = 0; grid < ctx->num_grids; grid++) {
593:               const PetscInt NN = nTargetP[grid];
594:               for (int pp = 0; pp < NN; pp++) {
595:                 const PetscReal fact = dim == 2 ? 2.0 * PETSC_PI * xx_t[grid][tid][pp] : 1, w = fact * ctx->n_0 * ctx->masses[ctx->species_offset[grid]] * wp_t[grid][tid][pp], ww = w * N_inv;
596:                 if (w > PETSC_REAL_MIN) {
597:                   moments_0[3] -= ww * PetscLogReal(ww);
598:                   PetscCheck(ww < 1 - PETSC_MACHINE_EPSILON, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "ww (%g) > 1", (double)ww);
599:                 } else moments_0[4] -= w;
600:               }
601:             } // grid
602:           } // target
603:         } // active
604:       } // threads
605:       /* Create particle swarm */
606:       for (int tid = 0; tid < numthreads; tid++) {
607:         const PetscInt v_id = v_id_0 + tid, glb_v_id = global_vertex_id_0 + v_id;
608:         if (glb_v_id < num_vertices) {                             // the ragged edge of the last batch
609:           for (PetscInt grid = 0; grid < ctx->num_grids; grid++) { // add same particels for all grids
610:             PetscErrorCode ierr_t;
611:             PetscSection   section;
612:             PetscInt       Nf;
613:             DM             dm = grid_dm[grid];
614:             ierr_t            = DMGetLocalSection(dm, &section);
615:             ierr_t            = PetscSectionGetNumFields(section, &Nf);
616:             if (Nf != 1) ierr_t = (PetscErrorCode)9999;
617:             else {
618:               ierr_t = DMViewFromOptions(dm, NULL, "-dm_view");
619:               ierr_t = PetscInfo(pack, "call createSwarm [%" PetscInt_FMT ".%" PetscInt_FMT "] local block index %" PetscInt_FMT "\n", v_id, grid, LAND_PACK_IDX(v_id, grid));
620:               ierr_t = createSwarm(dm, dim, &globSwarmArray[LAND_PACK_IDX(v_id, grid)]);
621:             }
622:             if (ierr_t) ierr = ierr_t;
623:           }
624:         } // active
625:       } // threads
626:       PetscCheck(ierr != 9999, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Only support one species per grid");
627:       PetscCheck(!ierr, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Error in OMP loop. ierr = %d", (int)ierr);
628:       // make globMpArray
629:       PetscPragmaOMP(parallel for)
630:       for (int tid = 0; tid < numthreads; tid++) {
631:         const PetscInt v_id = v_id_0 + tid, glb_v_id = global_vertex_id_0 + v_id;
632:         if (glb_v_id < num_vertices) {
633:           for (PetscInt grid = 0; grid < ctx->num_grids; grid++) { // add same particels for all grids
634:             PetscErrorCode ierr_t;
635:             DM             sw = globSwarmArray[LAND_PACK_IDX(v_id, grid)];
636:             ierr_t            = PetscInfo(pack, "makeSwarm %" PetscInt_FMT ".%" PetscInt_FMT ") for block %" PetscInt_FMT "\n", v_id, grid, LAND_PACK_IDX(v_id, grid));
637:             ierr_t            = makeSwarm(sw, dim, Np_t[grid][tid], xx_t[grid][tid], yy_t[grid][tid], zz_t[grid][tid]);
638:             if (ierr_t) ierr = ierr_t;
639:           }
640:         }
641:       }
642:       //PetscPragmaOMP(parallel for)
643:       for (int tid = 0; tid < numthreads; tid++) {
644:         const PetscInt v_id = v_id_0 + tid, glb_v_id = global_vertex_id_0 + v_id;
645:         if (glb_v_id < num_vertices) {
646:           for (PetscInt grid = 0; grid < ctx->num_grids; grid++) { // add same particels for all grids
647:             PetscErrorCode ierr_t;
648:             DM             dm = grid_dm[grid];
649:             DM             sw = globSwarmArray[LAND_PACK_IDX(v_id, grid)];
650:             ierr_t            = PetscInfo(pack, "createMp %" PetscInt_FMT ".%" PetscInt_FMT ") for block %" PetscInt_FMT "\n", v_id, grid, LAND_PACK_IDX(v_id, grid));
651:             ierr_t            = createMp(dm, sw, &globMpArray[LAND_PACK_IDX(v_id, grid)]);
652:             if (ierr_t) ierr = ierr_t;
653:           }
654:         }
655:       }
656:       PetscCheck(!ierr, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Error in OMP loop. ierr = %d", (int)ierr);
657:       // p --> g: set X
658:       // PetscPragmaOMP(parallel for)
659:       for (int tid = 0; tid < numthreads; tid++) {
660:         const PetscInt v_id = v_id_0 + tid, glb_v_id = global_vertex_id_0 + v_id;
661:         if (glb_v_id < num_vertices) {
662:           for (PetscInt grid = 0; grid < ctx->num_grids; grid++) { // add same particels for all grids
663:             PetscErrorCode ierr_t;
664:             DM             dm   = grid_dm[grid];
665:             DM             sw   = globSwarmArray[LAND_PACK_IDX(v_id, grid)];
666:             Vec            subX = globXArray[LAND_PACK_IDX(v_id, grid)], work = t_fhat[grid][tid];
667:             ierr_t = PetscInfo(pack, "particlesToGrid %" PetscInt_FMT ".%" PetscInt_FMT ") for block %" PetscInt_FMT "\n", v_id, grid, LAND_PACK_IDX(v_id, grid));
668:             ierr_t = particlesToGrid(dm, sw, tid, dim, wp_t[grid][tid], subX, globMpArray[LAND_PACK_IDX(v_id, grid)]);
669:             if (ierr_t) ierr = ierr_t;
670:             // u = M^_1 f_w
671:             ierr_t = VecCopy(subX, work);
672:             ierr_t = KSPSolve(t_ksp[grid][tid], work, subX);
673:             if (ierr_t) ierr = ierr_t;
674:           }
675:         }
676:       }
677:       PetscCheck(!ierr, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Error in OMP loop. ierr = %d", (int)ierr);
678:       /* Cleanup */
679:       for (int tid = 0; tid < numthreads; tid++) {
680:         const PetscInt v_id = v_id_0 + tid, glb_v_id = global_vertex_id_0 + v_id;
681:         if (glb_v_id < num_vertices) {
682:           for (PetscInt grid = 0; grid < ctx->num_grids; grid++) { // add same particels for all grids
683:             PetscCall(PetscFree4(xx_t[grid][tid], yy_t[grid][tid], wp_t[grid][tid], zz_t[grid][tid]));
684:           }
685:         } // active
686:       } // threads
687:     } // (fake) particle loop
688:     // standard view of initial conditions
689:     if (v_target >= global_vertex_id_0 && v_target < global_vertex_id_0 + ctx->batch_sz) {
690:       PetscCall(DMSetOutputSequenceNumber(ctx->plex[g_target], 0, 0.0));
691:       PetscCall(VecViewFromOptions(globXArray[LAND_PACK_IDX(v_target % ctx->batch_sz, g_target)], NULL, "-ex30_vec_view"));
692:       if (ctx->num_grids > g_target + 1) {
693:         PetscCall(DMSetOutputSequenceNumber(ctx->plex[g_target + 1], 0, 0.0));
694:         PetscCall(VecViewFromOptions(globXArray[LAND_PACK_IDX(v_target % ctx->batch_sz, g_target + 1)], NULL, "-ex30_vec_view2"));
695:       }
696:     }
697:     // coarse graining moments
698:     if (v_target >= global_vertex_id_0 && v_target < global_vertex_id_0 + ctx->batch_sz && printCtx->print_entropy) {
699:       const PetscInt v_id = v_target % ctx->batch_sz;
700:       for (PetscInt grid = 0; grid < ctx->num_grids; grid++) {
701:         PetscDataType dtype;
702:         PetscReal    *wp, *coords;
703:         DM            sw = globSwarmArray[LAND_PACK_IDX(v_id, grid)];
704:         Vec           work, subX = globXArray[LAND_PACK_IDX(v_id, grid)];
705:         PetscInt      bs, NN;
706:         // C-G moments
707:         PetscCall(VecDuplicate(subX, &work));
708:         PetscCall(gridToParticles(grid_dm[grid], sw, subX, work, globMpArray[LAND_PACK_IDX(v_id, grid)], g_Mass[grid]));
709:         PetscCall(VecDestroy(&work));
710:         // moments
711:         PetscCall(DMSwarmGetField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
712:         PetscCall(DMSwarmGetLocalSize(sw, &NN));
713:         PetscCall(DMSwarmGetField(sw, "w_q", &bs, &dtype, (void **)&wp));
714:         for (int pp = 0; pp < NN; pp++) {
715:           PetscReal v2 = 0, fact = (dim == 2) ? 2.0 * PETSC_PI * coords[pp * dim + 0] : 1, w = fact * wp[pp] * ctx->n_0 * ctx->masses[ctx->species_offset[grid]];
716:           for (int i = 0; i < dim; ++i) v2 += PetscSqr(coords[pp * dim + i]);
717:           moments_1a[0] += w;
718:           moments_1a[1] += w * ctx->v_0 * coords[pp * dim + 1]; // z-momentum
719:           moments_1a[2] += w * 0.5 * ctx->v_0 * ctx->v_0 * v2;
720:         }
721:         PetscCall(DMSwarmRestoreField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
722:         PetscCall(DMSwarmRestoreField(sw, "w_q", &bs, &dtype, (void **)&wp));
723:       }
724:       // entropy
725:       const PetscReal N_inv = 1 / moments_1a[0];
726:       PetscCall(PetscInfo(pack, "Entropy batch %" PetscInt_FMT " of %" PetscInt_FMT ", n = %g\n", v_target, num_vertices, (double)(1 / N_inv)));
727:       for (PetscInt grid = 0; grid < ctx->num_grids; grid++) {
728:         PetscDataType dtype;
729:         PetscReal    *wp, *coords;
730:         DM            sw = globSwarmArray[LAND_PACK_IDX(v_id, grid)];
731:         PetscInt      bs, NN;
732:         PetscCall(DMSwarmGetLocalSize(sw, &NN));
733:         PetscCall(DMSwarmGetField(sw, "w_q", &bs, &dtype, (void **)&wp));
734:         PetscCall(DMSwarmGetField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
735:         for (int pp = 0; pp < NN; pp++) {
736:           PetscReal fact = (dim == 2) ? 2.0 * PETSC_PI * coords[pp * dim + 0] : 1, w = fact * wp[pp] * ctx->n_0 * ctx->masses[ctx->species_offset[grid]], ww = w * N_inv;
737:           if (w > PETSC_REAL_MIN) {
738:             moments_1a[3] -= ww * PetscLogReal(ww);
739:             PetscCheck(ww < 1 - PETSC_MACHINE_EPSILON, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "ww (%g) > 1", (double)ww);
740:           } else moments_1a[4] -= w;
741:         }
742:         PetscCall(DMSwarmRestoreField(sw, "w_q", &bs, &dtype, (void **)&wp));
743:         PetscCall(DMSwarmRestoreField(sw, "DMSwarmPIC_coor", &bs, &dtype, (void **)&coords));
744:       }
745:     }
746:     // restore vector
747:     PetscCall(DMCompositeRestoreAccessArray(pack, X, nDMs, NULL, globXArray));
748:     // view
749:     if (v_target >= global_vertex_id_0 && v_target < global_vertex_id_0 + ctx->batch_sz) { PetscCall(DMPlexLandauPrintNorms(X, 0)); }
750:     // advance
751:     PetscCall(TSSetSolution(ts, X));
752:     PetscCall(PetscInfo(pack, "Advance vertex %" PetscInt_FMT " to %" PetscInt_FMT "\n", global_vertex_id_0, global_vertex_id_0 + ctx->batch_sz));
753:     PetscCall(TSSetPostStep(ts, PostStep));
754:     PetscCall(PostStep(ts));
755:     PetscCall(TSSolve(ts, X));
756:     // view
757:     PetscCall(DMCompositeGetAccessArray(pack, X, nDMs, NULL, globXArray));
758:     if (v_target >= global_vertex_id_0 && v_target < global_vertex_id_0 + ctx->batch_sz) {
759:       DM        sw = globSwarmArray[LAND_PACK_IDX(v_target % ctx->batch_sz, g_target)];
760:       PetscInt  id;
761:       PetscReal t;
762:       PetscCall(DMPlexLandauPrintNorms(X, 1));
763:       PetscCall(TSGetTime(ts, &t));
764:       PetscCall(DMGetOutputSequenceNumber(ctx->plex[g_target], &id, NULL));
765:       PetscCall(DMSetOutputSequenceNumber(ctx->plex[g_target], id + 1, t));
766:       PetscCall(VecViewFromOptions(globXArray[LAND_PACK_IDX(v_target % ctx->batch_sz, g_target)], NULL, "-ex30_vec_view"));
767:       if (ctx->num_grids > g_target + 1) {
768:         PetscCall(DMSetOutputSequenceNumber(ctx->plex[g_target + 1], id + 1, t));
769:         PetscCall(VecViewFromOptions(globXArray[LAND_PACK_IDX(v_target % ctx->batch_sz, g_target + 1)], NULL, "-ex30_vec_view2"));
770:       }
771:       /* Visualize particle field */
772:       Vec f;
773:       PetscCall(DMSetOutputSequenceNumber(sw, 0, 0.0));
774:       PetscCall(DMViewFromOptions(grid_dm[g_target], NULL, "-weights_dm_view"));
775:       PetscCall(DMViewFromOptions(sw, NULL, "-weights_sw_view"));
776:       PetscCall(DMSwarmCreateGlobalVectorFromField(sw, "w_q", &f));
777:       PetscCall(PetscObjectSetName((PetscObject)f, "weights"));
778:       PetscCall(VecViewFromOptions(f, NULL, "-weights_vec_view"));
779:       PetscCall(DMSwarmDestroyGlobalVectorFromField(sw, "w_q", &f));
780:     }
781:     // particles to grid, compute moments and entropy
782:     if (v_target >= global_vertex_id_0 && v_target < global_vertex_id_0 + ctx->batch_sz && printCtx->print_entropy) {
783:       PetscCall(gridToParticles_private(grid_dm, globSwarmArray, dim, v_target, numthreads, num_vertices, global_vertex_id_0, globMpArray, g_Mass, t_fhat, moments_1b, globXArray, ctx));
784:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Particle Moments:\t number density      momentum (par)     energy             entropy      loss : # OMP threads %g\n", (double)numthreads));
785:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tInitial:         %18.12e %19.12e %18.12e %e %e\n", (double)moments_0[0], (double)moments_0[1], (double)moments_0[2], (double)moments_0[3], (double)moments_0[4]));
786:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tCoarse-graining: %18.12e %19.12e %18.12e %e %e\n", (double)moments_1a[0], (double)moments_1a[1], (double)moments_1a[2], (double)moments_1a[3], (double)moments_1a[4]));
787:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tLandau:          %18.12e %19.12e %18.12e %e %e\n", (double)moments_1b[0], (double)moments_1b[1], (double)moments_1b[2], (double)moments_1b[3], (double)moments_1b[4]));
788:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Coarse-graining entropy generation = %e ; Landau entropy generation = %e\n", (double)(moments_1a[3] - moments_0[3]), (double)(moments_1b[3] - moments_0[3])));
789:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "(relative) energy conservation: Coarse-graining = %e ; Landau = %e\n", (double)(moments_1a[2] - moments_0[2]) / (double)moments_0[2], (double)(moments_1b[2] - moments_0[2]) / (double)moments_0[2]));
790:     }
791:     // restore vector
792:     PetscCall(DMCompositeRestoreAccessArray(pack, X, nDMs, NULL, globXArray));
793:     // cleanup
794:     for (PetscInt v_id_0 = 0; v_id_0 < ctx->batch_sz; v_id_0 += numthreads) {
795:       for (int tid = 0; tid < numthreads; tid++) {
796:         const PetscInt v_id = v_id_0 + tid, glb_v_id = global_vertex_id_0 + v_id;
797:         if (glb_v_id < num_vertices) {
798:           for (PetscInt grid = 0; grid < ctx->num_grids; grid++) {
799:             PetscCall(DMDestroy(&globSwarmArray[LAND_PACK_IDX(v_id, grid)]));
800:             PetscCall(MatDestroy(&globMpArray[LAND_PACK_IDX(v_id, grid)]));
801:           }
802:         }
803:       }
804:     }
805:   } // user batch, not used
806:   /* Cleanup */
807:   PetscCall(PetscFree(globXArray));
808:   PetscCall(PetscFree(globSwarmArray));
809:   PetscCall(PetscFree(globMpArray));
810:   PetscCall(PetscFree(printCtx));
811:   // clean up mass matrices
812:   for (PetscInt grid = 0; grid < ctx->num_grids; grid++) { // add same particels for all grids
813:     PetscCall(MatDestroy(&g_Mass[grid]));
814:     for (int tid = 0; tid < numthreads; tid++) {
815:       PetscCall(VecDestroy(&t_fhat[grid][tid]));
816:       PetscCall(KSPDestroy(&t_ksp[grid][tid]));
817:     }
818:   }
819:   PetscFunctionReturn(PETSC_SUCCESS);
820: }

822: int main(int argc, char **argv)
823: {
824:   DM         pack;
825:   Vec        X;
826:   PetscInt   dim = 2, num_vertices = 1, Np = 10, v_target = 0, g_target = 0;
827:   TS         ts;
828:   Mat        J;
829:   LandauCtx *ctx;
830:   PetscReal  shift                     = 0;
831:   PetscBool  use_uniform_particle_grid = PETSC_TRUE;

833:   PetscFunctionBeginUser;
834:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
835:   // process args
836:   PetscOptionsBegin(PETSC_COMM_SELF, "", "Collision Options", "DMPLEX");
837:   PetscCall(PetscOptionsInt("-dim", "Velocity space dimension", "ex30.c", dim, &dim, NULL));
838:   PetscCall(PetscOptionsInt("-number_spatial_vertices", "Number of user spatial vertices to be batched for Landau", "ex30.c", num_vertices, &num_vertices, NULL));
839:   PetscCall(PetscOptionsInt("-number_particles_per_dimension", "Number of particles per grid, with slight modification per spatial vertex, in each dimension of base Cartesian grid", "ex30.c", Np, &Np, NULL));
840:   PetscCall(PetscOptionsBool("-use_uniform_particle_grid", "Use uniform particle grid", "ex30.c", use_uniform_particle_grid, &use_uniform_particle_grid, NULL));
841:   PetscCall(PetscOptionsRangeInt("-vertex_view_target", "Global vertex for diagnostics", "ex30.c", v_target, &v_target, NULL, 0, num_vertices - 1));
842:   PetscCall(PetscOptionsReal("-e_shift", "Bim-Maxwellian shift", "ex30.c", shift, &shift, NULL));
843:   PetscCall(PetscOptionsInt("-grid_view_target", "Grid to view with diagnostics", "ex30.c", g_target, &g_target, NULL));
844:   PetscOptionsEnd();
845:   /* Create a mesh */
846:   PetscCall(DMPlexLandauCreateVelocitySpace(PETSC_COMM_SELF, dim, "", &X, &J, &pack));
847:   PetscCall(DMSetUp(pack));
848:   PetscCall(DMSetOutputSequenceNumber(pack, 0, 0.0));
849:   PetscCall(DMGetApplicationContext(pack, &ctx));
850:   PetscCheck(g_target < ctx->num_grids, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Grid to view %" PetscInt_FMT " should be < number of grids %" PetscInt_FMT, g_target, ctx->num_grids);
851:   PetscCheck(ctx->batch_view_idx == v_target % ctx->batch_sz, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Global view index %" PetscInt_FMT " mode batch size %" PetscInt_FMT " != ctx->batch_view_idx %" PetscInt_FMT, v_target, ctx->batch_sz, ctx->batch_view_idx);
852:   /* Create timestepping solver context */
853:   PetscCall(TSCreate(PETSC_COMM_SELF, &ts));
854:   PetscCall(TSSetDM(ts, pack));
855:   PetscCall(TSSetIFunction(ts, NULL, DMPlexLandauIFunction, NULL));
856:   PetscCall(TSSetIJacobian(ts, J, J, DMPlexLandauIJacobian, NULL));
857:   PetscCall(TSSetExactFinalTime(ts, TS_EXACTFINALTIME_STEPOVER));
858:   PetscCall(TSSetFromOptions(ts));
859:   PetscCall(PetscObjectSetName((PetscObject)X, "X"));
860:   // do particle advance
861:   PetscCall(go(ts, X, num_vertices, Np, dim, v_target, g_target, shift, use_uniform_particle_grid));
862:   PetscCall(MatZeroEntries(J)); // need to zero out so as to not reuse it in Landau's logic
863:   /* clean up */
864:   PetscCall(DMPlexLandauDestroyVelocitySpace(&pack));
865:   PetscCall(TSDestroy(&ts));
866:   PetscCall(VecDestroy(&X));
867:   PetscCall(PetscFinalize());
868:   return 0;
869: }

871: /*TEST

873:   build:
874:     requires: !complex

876:   testset:
877:     requires: double defined(PETSC_USE_DMLANDAU_2D)
878:     output_file: output/ex30_0.out
879:     args: -dim 2 -petscspace_degree 3 -dm_landau_num_species_grid 1,1,1 -dm_refine 1 -number_particles_per_dimension 10 -dm_plex_hash_location \
880:           -dm_landau_batch_size 4 -number_spatial_vertices 6 -vertex_view_target 5 -grid_view_target 1 -dm_landau_batch_view_idx 1 \
881:           -dm_landau_n 1.000018,1,1e-6 -dm_landau_thermal_temps 2,1,1 -dm_landau_ion_masses 2,180 -dm_landau_ion_charges 1,18 \
882:           -ftop_ksp_rtol 1e-10 -ftop_ksp_type lsqr -ftop_pc_type bjacobi -ftop_sub_pc_factor_shift_type nonzero -ftop_sub_pc_type lu \
883:           -ksp_type preonly -pc_type lu -dm_landau_verbose 4 -print_entropy \
884:           -ptof_ksp_type cg -ptof_pc_type jacobi -ptof_ksp_rtol 1e-12\
885:           -snes_converged_reason -snes_monitor -snes_rtol 1e-14 -snes_stol 1e-14 \
886:           -ts_dt 0.01 -ts_rtol 1e-1 -ts_exact_final_time stepover -ts_max_snes_failures -1 -ts_max_steps 1 -ts_monitor -ts_type beuler

888:     test:
889:       suffix: cpu
890:       args: -dm_landau_device_type cpu
891:     test:
892:       suffix: kokkos
893:       # failed on Sunspot@ALCF with sycl
894:       requires: kokkos_kernels !openmp !sycl
895:       args: -dm_landau_device_type kokkos -dm_mat_type aijkokkos -dm_vec_type kokkos -pc_type bjkokkos -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi

897:   testset:
898:     requires: double !defined(PETSC_USE_DMLANDAU_2D)
899:     output_file: output/ex30_3d.out
900:     args: -dim 3 -petscspace_degree 2 -dm_landau_num_species_grid 1,1,1 -dm_refine 0 -number_particles_per_dimension 10 -dm_plex_hash_location \
901:           -dm_landau_batch_size 1 -number_spatial_vertices 1 -vertex_view_target 0 -grid_view_target 0 -dm_landau_batch_view_idx 0 \
902:           -dm_landau_n 1.000018,1,1e-6 -dm_landau_thermal_temps 2,1,1 -dm_landau_ion_masses 2,180 -dm_landau_ion_charges 1,18 \
903:           -ftop_ksp_rtol 1e-12 -ksp_type preonly -pc_type lu \
904:           -ptof_ksp_type cg -ptof_pc_type jacobi -ptof_ksp_rtol 1e-12\
905:           -snes_converged_reason -snes_monitor -snes_rtol 1e-12 -snes_stol 1e-12\
906:           -ts_dt 0.1 -ts_exact_final_time stepover -ts_max_snes_failures -1 -ts_max_steps 1 -ts_monitor -ts_type beuler -print_entropy

908:     test:
909:       suffix: cpu_3d
910:       args: -dm_landau_device_type cpu
911:     test:
912:       suffix: kokkos_3d
913:       requires: kokkos_kernels !openmp
914:       args: -dm_landau_device_type kokkos -dm_mat_type aijkokkos -dm_vec_type kokkos -pc_type bjkokkos -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi

916:   testset:
917:     requires: !complex double defined(PETSC_USE_DMLANDAU_2D) !cuda
918:     args: -dm_refine 1 -dm_landau_num_species_grid 1 -dm_landau_thermal_temps 1 -petscspace_degree 3 -snes_converged_reason -ts_type beuler -ts_dt .01 \
919:           -ts_max_steps 1 -ksp_type preonly -pc_type lu -snes_rtol 1e-12 -snes_stol 1e-12 -dm_landau_device_type cpu -number_particles_per_dimension 40 \
920:           -ptof_ksp_rtol 1e-12 -dm_landau_batch_size 4 -number_spatial_vertices 4 -grid_view_target 0 \
921:           -vertex_view_target 3 -dm_landau_batch_view_idx 3
922:     test:
923:       suffix: simple
924:       args: -ex30_dm_view
925:     test:
926:       requires: cgns
927:       suffix: cgns
928:       args: -ex30_vec_view cgns:cgnsDi.cgns
929:     test:
930:       suffix: normal
931:       args: -ex30_dm_view -use_uniform_particle_grid false

933: TEST*/