39 use num_types,
only: rp
40 use math,
only: subcol3, rzero
41 use space,
only: space_t
42 use field,
only: field_t
43 use coefs,
only: coef_t
44 use scratch_registry,
only: neko_scratch_registry
45 use neko_config,
only: neko_bcknd_device, neko_bcknd_sx, neko_bcknd_xsmm, &
46 neko_bcknd_opencl, neko_bcknd_cuda, neko_bcknd_hip
47 use operators,
only: opgrad, conv1, cdtp
48 use device_math,
only: device_vdot3, device_sub2, device_add4, &
49 device_col3, device_subcol3, device_rzero
50 use device,
only: device_free, device_map, device_get_ptr
51 use,
intrinsic :: iso_c_binding, only: c_ptr, c_null_ptr, &
58 real(kind=rp),
allocatable :: temp(:)
59 type(c_ptr) :: temp_d = c_null_ptr
64 procedure, pass(this) :: compute_linear => linear_advection_no_dealias
70 procedure, pass(this) :: compute_adjoint => adjoint_advection_no_dealias
75 procedure, pass(this) :: compute_adjoint_scalar => &
76 compute_adjoint_scalar_advection_no_dealias
84 procedure, pass(this) :: free => free_no_dealias
94 type(coef_t),
intent(in) :: coef
96 allocate(this%temp(coef%dof%size()))
98 if (neko_bcknd_device .eq. 1)
then
99 call device_map(this%temp, this%temp_d, coef%dof%size())
105 subroutine free_no_dealias(this)
108 if (
allocated(this%temp))
then
109 deallocate(this%temp)
111 if (c_associated(this%temp_d))
then
112 call device_free(this%temp_d)
114 end subroutine free_no_dealias
133 subroutine adjoint_advection_no_dealias(this, vx, vy, vz, vxb, vyb, vzb, fx, &
137 type(space_t),
intent(inout) :: Xh
138 type(coef_t),
intent(inout) :: coef
139 type(field_t),
intent(inout) :: vx, vy, vz
140 type(field_t),
intent(inout) :: vxb, vyb, vzb
141 integer,
intent(in) :: n
142 type(field_t),
intent(inout) :: fx, fy, fz
143 type(c_ptr) :: fx_d, fy_d, fz_d
144 type(c_ptr) :: vx_d, vy_d, vz_d
147 real(kind=rp),
dimension(Xh%lxyz) :: duxb, dvxb, dwxb
148 real(kind=rp),
dimension(Xh%lxyz) :: duyb, dvyb, dwyb
149 real(kind=rp),
dimension(Xh%lxyz) :: duzb, dvzb, dwzb
151 integer :: e, i, idx, idxx
154 type(field_t),
pointer :: tduxb, tdvxb, tdwxb, tduyb, tdvyb, tdwyb, tduzb, &
156 integer :: temp_indices(9)
159 if (neko_bcknd_device .eq. 1)
then
160 call neko_scratch_registry%request_field(tduxb, temp_indices(1), .false.)
161 call neko_scratch_registry%request_field(tdvxb, temp_indices(2), .false.)
162 call neko_scratch_registry%request_field(tdwxb, temp_indices(3), .false.)
163 call neko_scratch_registry%request_field(tduyb, temp_indices(4), .false.)
164 call neko_scratch_registry%request_field(tdvyb, temp_indices(5), .false.)
165 call neko_scratch_registry%request_field(tdwyb, temp_indices(6), .false.)
166 call neko_scratch_registry%request_field(tduzb, temp_indices(7), .false.)
167 call neko_scratch_registry%request_field(tdvzb, temp_indices(8), .false.)
168 call neko_scratch_registry%request_field(tdwzb, temp_indices(9), .false.)
179 call opgrad(tduxb%x, tduyb%x, tduzb%x, vxb%x, coef)
180 call opgrad(tdvxb%x, tdvyb%x, tdvzb%x, vyb%x, coef)
181 call opgrad(tdwxb%x, tdwyb%x, tdwzb%x, vzb%x, coef)
185 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
186 tduxb%x_d, tdvxb%x_d, tdwxb%x_d, n)
187 call device_sub2(fx_d, this%temp_d, n)
189 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
190 tduyb%x_d, tdvyb%x_d, tdwyb%x_d, n)
191 call device_sub2(fy_d, this%temp_d, n)
193 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
194 tduzb%x_d, tdvzb%x_d, tdwzb%x_d, n)
195 call device_sub2(fz_d, this%temp_d, n)
199 associate(w1 => tduxb, w2 => tdvxb, w3 => tdwxb, &
200 w4 => tduyb, w5 => tdvyb, w6 => tdwyb)
201 call adjoint_weak_no_dealias_device(fx_d, vx_d, &
202 vxb%x, vyb%x, vzb%x, &
204 w1, w2, w3, w4, w5, w6)
206 call adjoint_weak_no_dealias_device(fy_d, vy_d, &
207 vxb%x, vyb%x, vzb%x, &
209 w1, w2, w3, w4, w5, w6)
211 call adjoint_weak_no_dealias_device(fz_d, vz_d, &
212 vxb%x, vyb%x, vzb%x, &
214 w1, w2, w3, w4, w5, w6)
217 call neko_scratch_registry%relinquish_field(temp_indices)
219 do e = 1, coef%msh%nelv
222 call opgrad(duxb, duyb, duzb, vxb%x, coef, e, e)
223 call opgrad(dvxb, dvyb, dvzb, vyb%x, coef, e, e)
224 call opgrad(dwxb, dwyb, dwzb, vzb%x, coef, e, e)
227 idx = (e - 1)*xh%lxyz + 1
230 fx%x(idxx, 1, 1, 1) = fx%x(idxx, 1, 1, 1) - ( &
231 vx%x(i,1,1,e)*duxb(i) + &
232 vy%x(i,1,1,e)*dvxb(i) + &
233 vz%x(i,1,1,e)*dwxb(i) )
235 fy%x(idxx, 1, 1, 1) = fy%x(idxx, 1, 1, 1) - ( &
236 vx%x(i,1,1,e)*duyb(i) + &
237 vy%x(i,1,1,e)*dvyb(i) + &
238 vz%x(i,1,1,e)*dwyb(i))
240 fz%x(idxx, 1, 1, 1) = fz%x(idxx, 1, 1, 1) - ( &
241 vx%x(i,1,1,e)*duzb(i) + &
242 vy%x(i,1,1,e)*dvzb(i) + &
243 vz%x(i,1,1,e)*dwzb(i))
249 associate(w1 => duxb, w2 => dvxb, w3 => dwxb, &
250 w4 => duyb, w5 => dvyb, w6 => dwyb)
251 call adjoint_weak_no_dealias_cpu( &
252 fx%x(:,:,:,e), vx%x(1,1,1,e), &
253 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
254 e, coef, xh, xh%lxyz, &
255 w1, w2, w3, w4, w5, w6)
257 call adjoint_weak_no_dealias_cpu( &
258 fy%x(:,:,:,e), vy%x(1,1,1,e), &
259 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
260 e, coef, xh, xh%lxyz, &
261 w1, w2, w3, w4, w5, w6)
263 call adjoint_weak_no_dealias_cpu( &
264 fz%x(:,:,:,e), vz%x(1,1,1,e), &
265 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
266 e, coef, xh, xh%lxyz, &
267 w1, w2, w3, w4, w5, w6)
273 end subroutine adjoint_advection_no_dealias
287 subroutine adjoint_weak_no_dealias_device(f_d, u_i_d, ub, vb, wb, coef, Xh, &
288 n, work1, work2, work3, w1, w2, w3)
289 integer,
intent(in) :: n
290 type(c_ptr),
intent(inout) :: f_d
291 type(c_ptr),
intent(in) :: u_i_d
292 real(kind=rp),
intent(inout),
dimension(n) :: ub, vb, wb
293 type(field_t),
intent(inout) :: w1, w2, w3
294 type(field_t),
intent(inout) :: work1, work2, work3
295 type(space_t),
intent(inout) :: Xh
296 type(coef_t),
intent(inout) :: coef
297 type(c_ptr) :: ub_d, vb_d, wb_d
298 type(c_ptr) :: work1_d, work2_d, work3_d, w1_d, w2_d, w3_d
306 ub_d = device_get_ptr(ub)
307 vb_d = device_get_ptr(vb)
308 wb_d = device_get_ptr(wb)
311 call device_col3(work1_d, u_i_d, ub_d, n)
312 call device_col3(work2_d, u_i_d, vb_d, n)
313 call device_col3(work3_d, u_i_d, wb_d, n)
315 call cdtp(w1%x, work1%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
316 call cdtp(w2%x, work2%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
317 call cdtp(w3%x, work3%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
319 call device_add4(work1_d, w1_d, w2_d, w3_d , n)
320 call device_sub2(f_d, work1_d, n)
321 end subroutine adjoint_weak_no_dealias_device
335 subroutine adjoint_weak_no_dealias_cpu(f, u_i, ub, vb, wb, e, coef, Xh, n, &
336 work1, work2, work3, w1, w2, w3)
338 integer,
intent(in) :: e, n
340 real(kind=rp),
intent(inout),
dimension(n) :: f
341 real(kind=rp),
intent(inout),
dimension(n) :: u_i
342 real(kind=rp),
intent(inout),
dimension(n) :: ub, vb, wb
343 real(kind=rp),
intent(inout),
dimension(n) :: w1, w2, w3
344 real(kind=rp),
intent(inout),
dimension(n) :: work1, work2, work3
345 type(space_t),
intent(inout) :: xh
346 type(coef_t),
intent(inout) :: coef
350 work1(i) = u_i(i)*ub(i)
351 work2(i) = u_i(i)*vb(i)
352 work3(i) = u_i(i)*wb(i)
356 call cdtp(w1, work1, coef%drdx, coef%dsdx, coef%dtdx, coef, e ,e)
357 call cdtp(w2, work2, coef%drdy, coef%dsdy, coef%dtdy, coef, e ,e)
358 call cdtp(w3, work3, coef%drdz, coef%dsdz, coef%dtdz, coef, e, e)
362 f(i) = f(i) - w1(i) + w2(i) + w3(i)
364 end subroutine adjoint_weak_no_dealias_cpu
384 subroutine linear_advection_no_dealias(this, vx, vy, vz, vxb, vyb, vzb, fx, &
388 type(space_t),
intent(inout) :: Xh
389 type(coef_t),
intent(inout) :: coef
390 type(field_t),
intent(inout) :: vx, vy, vz
391 type(field_t),
intent(inout) :: vxb, vyb, vzb
392 integer,
intent(in) :: n
393 type(field_t),
intent(inout) :: fx, fy, fz
394 type(c_ptr) :: fx_d, fy_d, fz_d
396 if (neko_bcknd_device .eq. 1)
then
402 call conv1(this%temp, vx%x, vxb%x, vyb%x, vzb%x, xh, coef)
403 call device_subcol3 (fx_d, coef%B_d, this%temp_d, n)
404 call conv1(this%temp, vxb%x, vx%x, vy%x, vz%x, xh, coef)
405 call device_subcol3 (fx_d, coef%B_d, this%temp_d, n)
408 call conv1(this%temp, vy%x, vxb%x, vyb%x, vzb%x, xh, coef)
409 call device_subcol3 (fy_d, coef%B_d, this%temp_d, n)
410 call conv1(this%temp, vyb%x, vx%x, vy%x, vz%x, xh, coef)
411 call device_subcol3 (fy_d, coef%B_d, this%temp_d, n)
414 if (coef%Xh%lz .eq. 1)
then
415 call device_rzero (this%temp_d, n)
417 call conv1(this%temp, vz%x, vxb%x, vyb%x, vzb%x, xh, coef)
418 call device_subcol3(fz_d, coef%B_d, this%temp_d, n)
419 call conv1(this%temp, vzb%x, vx%x, vy%x, vz%x, xh, coef)
420 call device_subcol3(fz_d, coef%B_d, this%temp_d, n)
424 call conv1(this%temp, vx%x, vxb%x, vyb%x, vzb%x, xh, coef)
425 call subcol3 (fx%x, coef%B, this%temp, n)
426 call conv1(this%temp, vxb%x, vx%x, vy%x, vz%x, xh, coef)
427 call subcol3 (fx%x, coef%B, this%temp, n)
430 call conv1(this%temp, vy%x, vxb%x, vyb%x, vzb%x, xh, coef)
431 call subcol3 (fy%x, coef%B, this%temp, n)
432 call conv1(this%temp, vyb%x, vx%x, vy%x, vz%x, xh, coef)
433 call subcol3 (fy%x, coef%B, this%temp, n)
436 if (coef%Xh%lz .eq. 1)
then
437 call rzero (this%temp, n)
439 call conv1(this%temp, vz%x, vxb%x, vyb%x, vzb%x, xh, coef)
440 call subcol3(fz%x, coef%B, this%temp, n)
441 call conv1(this%temp, vzb%x, vx%x, vy%x, vz%x, xh, coef)
442 call subcol3(fz%x, coef%B, this%temp, n)
446 end subroutine linear_advection_no_dealias
462 subroutine compute_adjoint_scalar_advection_no_dealias(this, vxb, vyb, vzb, &
463 s, fs, Xh, coef, n, dt)
465 type(field_t),
intent(inout) :: vxb, vyb, vzb
466 type(field_t),
intent(inout) :: s
467 type(field_t),
intent(inout) :: fs
468 type(space_t),
intent(inout) :: Xh
469 type(coef_t),
intent(inout) :: coef
470 integer,
intent(in) :: n
471 real(kind=rp),
intent(in),
optional :: dt
472 real(kind=rp),
dimension(Xh%lxyz) :: w1, w2, w3, w4, w5, w6
474 type(field_t),
pointer :: w1_d, w2_d, w3_d, w4_d, w5_d, w6_d
475 integer :: temp_indices(6)
477 if (neko_bcknd_device .eq. 1)
then
478 call neko_scratch_registry%request_field(w1_d, temp_indices(1), .false.)
479 call neko_scratch_registry%request_field(w2_d, temp_indices(2), .false.)
480 call neko_scratch_registry%request_field(w3_d, temp_indices(3), .false.)
481 call neko_scratch_registry%request_field(w4_d, temp_indices(4), .false.)
482 call neko_scratch_registry%request_field(w5_d, temp_indices(5), .false.)
483 call neko_scratch_registry%request_field(w6_d, temp_indices(6), .false.)
485 call adjoint_weak_no_dealias_device(fs%x_d, s%x_d, &
486 vxb%x, vyb%x, vzb%x, &
488 w1_d, w2_d, w3_d, w4_d, w5_d, w6_d)
490 call neko_scratch_registry%relinquish_field(temp_indices)
493 do e = 1, coef%msh%nelv
496 call adjoint_weak_no_dealias_cpu( &
497 fs%x(:,:,:,e), s%x(1,1,1,e), &
498 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
499 e, coef, xh, xh%lxyz, &
500 w1, w2, w3, w4, w5, w6)
504 end subroutine compute_adjoint_scalar_advection_no_dealias
Subroutines to add perturbed advection terms to the RHS of a transport equation.
subroutine init_no_dealias(this, coef)
Constructor.
Subroutines to add advection terms to the RHS of a transport equation.
Type encapsulating advection routines with no dealiasing applied.
Base abstract type for computing the advection operator.