37 use num_types,
only: rp
38 use math,
only: subcol3, rzero
39 use space,
only: space_t
40 use field,
only: field_t
41 use coefs,
only: coef_t
42 use scratch_registry,
only: neko_scratch_registry
43 use neko_config,
only: neko_bcknd_device, neko_bcknd_sx, neko_bcknd_xsmm, &
44 neko_bcknd_opencl, neko_bcknd_cuda, neko_bcknd_hip
45 use operators,
only: opgrad, conv1, cdtp
46 use device_math,
only: device_vdot3, device_sub2, device_add4, &
47 device_col3, device_subcol3, device_rzero
48 use device,
only: device_free, device_map, device_get_ptr
49 use,
intrinsic :: iso_c_binding, only: c_ptr, c_null_ptr, &
56 real(kind=rp),
allocatable :: temp(:)
57 type(c_ptr) :: temp_d = c_null_ptr
62 procedure, pass(this) :: compute_linear => linear_advection_no_dealias
68 procedure, pass(this) :: compute_adjoint => adjoint_advection_no_dealias
73 procedure, pass(this) :: compute_adjoint_scalar => &
74 compute_adjoint_scalar_advection_no_dealias
80 procedure, pass(this) :: init => init_no_dealias
82 procedure, pass(this) :: free => free_no_dealias
90 subroutine init_no_dealias(this, coef)
92 type(coef_t),
intent(in) :: coef
94 allocate(this%temp(coef%dof%size()))
96 if (neko_bcknd_device .eq. 1)
then
97 call device_map(this%temp, this%temp_d, coef%dof%size())
100 end subroutine init_no_dealias
103 subroutine free_no_dealias(this)
106 if (
allocated(this%temp))
then
107 deallocate(this%temp)
109 if (c_associated(this%temp_d))
then
110 call device_free(this%temp_d)
112 end subroutine free_no_dealias
131 subroutine adjoint_advection_no_dealias(this, vx, vy, vz, vxb, vyb, vzb, fx, &
135 type(space_t),
intent(inout) :: Xh
136 type(coef_t),
intent(inout) :: coef
137 type(field_t),
intent(inout) :: vx, vy, vz
138 type(field_t),
intent(inout) :: vxb, vyb, vzb
139 integer,
intent(in) :: n
140 type(field_t),
intent(inout) :: fx, fy, fz
141 type(c_ptr) :: fx_d, fy_d, fz_d
142 type(c_ptr) :: vx_d, vy_d, vz_d
145 real(kind=rp),
dimension(Xh%lxyz) :: duxb, dvxb, dwxb
146 real(kind=rp),
dimension(Xh%lxyz) :: duyb, dvyb, dwyb
147 real(kind=rp),
dimension(Xh%lxyz) :: duzb, dvzb, dwzb
149 integer :: e, i, idx, idxx
152 type(field_t),
pointer :: tduxb, tdvxb, tdwxb, tduyb, tdvyb, tdwyb, tduzb, &
154 integer :: temp_indices(9)
157 if (neko_bcknd_device .eq. 1)
then
158 call neko_scratch_registry%request_field(tduxb, temp_indices(1))
159 call neko_scratch_registry%request_field(tdvxb, temp_indices(2))
160 call neko_scratch_registry%request_field(tdwxb, temp_indices(3))
161 call neko_scratch_registry%request_field(tduyb, temp_indices(4))
162 call neko_scratch_registry%request_field(tdvyb, temp_indices(5))
163 call neko_scratch_registry%request_field(tdwyb, temp_indices(6))
164 call neko_scratch_registry%request_field(tduzb, temp_indices(7))
165 call neko_scratch_registry%request_field(tdvzb, temp_indices(8))
166 call neko_scratch_registry%request_field(tdwzb, temp_indices(9))
177 call opgrad(tduxb%x, tduyb%x, tduzb%x, vxb%x, coef)
178 call opgrad(tdvxb%x, tdvyb%x, tdvzb%x, vyb%x, coef)
179 call opgrad(tdwxb%x, tdwyb%x, tdwzb%x, vzb%x, coef)
183 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
184 tduxb%x_d, tdvxb%x_d, tdwxb%x_d, n)
185 call device_sub2(fx_d, this%temp_d, n)
187 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
188 tduyb%x_d, tdvyb%x_d, tdwyb%x_d, n)
189 call device_sub2(fy_d, this%temp_d, n)
191 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
192 tduzb%x_d, tdvzb%x_d, tdwzb%x_d, n)
193 call device_sub2(fz_d, this%temp_d, n)
197 associate(w1 => tduxb, w2 => tdvxb, w3 => tdwxb, &
198 w4 => tduyb, w5 => tdvyb, w6 => tdwyb)
199 call adjoint_weak_no_dealias_device(fx_d, vx_d, &
200 vxb%x, vyb%x, vzb%x, &
202 w1, w2, w3, w4, w5, w6)
204 call adjoint_weak_no_dealias_device(fy_d, vy_d, &
205 vxb%x, vyb%x, vzb%x, &
207 w1, w2, w3, w4, w5, w6)
209 call adjoint_weak_no_dealias_device(fz_d, vz_d, &
210 vxb%x, vyb%x, vzb%x, &
212 w1, w2, w3, w4, w5, w6)
215 call neko_scratch_registry%relinquish_field(temp_indices)
217 do e = 1, coef%msh%nelv
220 call opgrad(duxb, duyb, duzb, vxb%x, coef, e, e)
221 call opgrad(dvxb, dvyb, dvzb, vyb%x, coef, e, e)
222 call opgrad(dwxb, dwyb, dwzb, vzb%x, coef, e, e)
225 idx = (e - 1)*xh%lxyz + 1
228 fx%x(idxx, 1, 1, 1) = fx%x(idxx, 1, 1, 1) - ( &
229 vx%x(i,1,1,e)*duxb(i) + &
230 vy%x(i,1,1,e)*dvxb(i) + &
231 vz%x(i,1,1,e)*dwxb(i) )
233 fy%x(idxx, 1, 1, 1) = fy%x(idxx, 1, 1, 1) - ( &
234 vx%x(i,1,1,e)*duyb(i) + &
235 vy%x(i,1,1,e)*dvyb(i) + &
236 vz%x(i,1,1,e)*dwyb(i))
238 fz%x(idxx, 1, 1, 1) = fz%x(idxx, 1, 1, 1) - ( &
239 vx%x(i,1,1,e)*duzb(i) + &
240 vy%x(i,1,1,e)*dvzb(i) + &
241 vz%x(i,1,1,e)*dwzb(i))
247 associate(w1 => duxb, w2 => dvxb, w3 => dwxb, &
248 w4 => duyb, w5 => dvyb, w6 => dwyb)
249 call adjoint_weak_no_dealias_cpu( &
250 fx%x(:,:,:,e), vx%x(1,1,1,e), &
251 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
252 e, coef, xh, xh%lxyz, &
253 w1, w2, w3, w4, w5, w6)
255 call adjoint_weak_no_dealias_cpu( &
256 fy%x(:,:,:,e), vy%x(1,1,1,e), &
257 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
258 e, coef, xh, xh%lxyz, &
259 w1, w2, w3, w4, w5, w6)
261 call adjoint_weak_no_dealias_cpu( &
262 fz%x(:,:,:,e), vz%x(1,1,1,e), &
263 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
264 e, coef, xh, xh%lxyz, &
265 w1, w2, w3, w4, w5, w6)
271 end subroutine adjoint_advection_no_dealias
285 subroutine adjoint_weak_no_dealias_device(f_d, u_i_d, ub, vb, wb, coef, Xh, &
286 n, work1, work2, work3, w1, w2, w3)
287 integer,
intent(in) :: n
288 type(c_ptr),
intent(inout) :: f_d
289 type(c_ptr),
intent(in) :: u_i_d
290 real(kind=rp),
intent(inout),
dimension(n) :: ub, vb, wb
291 type(field_t),
intent(inout) :: w1, w2, w3
292 type(field_t),
intent(inout) :: work1, work2, work3
293 type(space_t),
intent(inout) :: Xh
294 type(coef_t),
intent(inout) :: coef
295 type(c_ptr) :: ub_d, vb_d, wb_d
296 type(c_ptr) :: work1_d, work2_d, work3_d, w1_d, w2_d, w3_d
304 ub_d = device_get_ptr(ub)
305 vb_d = device_get_ptr(vb)
306 wb_d = device_get_ptr(wb)
309 call device_col3(work1_d, u_i_d, ub_d, n)
310 call device_col3(work2_d, u_i_d, vb_d, n)
311 call device_col3(work3_d, u_i_d, wb_d, n)
313 call cdtp(w1%x, work1%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
314 call cdtp(w2%x, work2%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
315 call cdtp(w3%x, work3%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
317 call device_add4(work1_d, w1_d, w2_d, w3_d , n)
318 call device_sub2(f_d, work1_d, n)
319 end subroutine adjoint_weak_no_dealias_device
333 subroutine adjoint_weak_no_dealias_cpu(f, u_i, ub, vb, wb, e, coef, Xh, n, &
334 work1, work2, work3, w1, w2, w3)
336 integer,
intent(in) :: e, n
338 real(kind=rp),
intent(inout),
dimension(n) :: f
339 real(kind=rp),
intent(inout),
dimension(n) :: u_i
340 real(kind=rp),
intent(inout),
dimension(n) :: ub, vb, wb
341 real(kind=rp),
intent(inout),
dimension(n) :: w1, w2, w3
342 real(kind=rp),
intent(inout),
dimension(n) :: work1, work2, work3
343 type(space_t),
intent(inout) :: xh
344 type(coef_t),
intent(inout) :: coef
348 work1(i) = u_i(i)*ub(i)
349 work2(i) = u_i(i)*vb(i)
350 work3(i) = u_i(i)*wb(i)
354 call cdtp(w1, work1, coef%drdx, coef%dsdx, coef%dtdx, coef, e ,e)
355 call cdtp(w2, work2, coef%drdy, coef%dsdy, coef%dtdy, coef, e ,e)
356 call cdtp(w3, work3, coef%drdz, coef%dsdz, coef%dtdz, coef, e, e)
360 f(i) = f(i) - w1(i) + w2(i) + w3(i)
362 end subroutine adjoint_weak_no_dealias_cpu
382 subroutine linear_advection_no_dealias(this, vx, vy, vz, vxb, vyb, vzb, fx, &
386 type(space_t),
intent(inout) :: Xh
387 type(coef_t),
intent(inout) :: coef
388 type(field_t),
intent(inout) :: vx, vy, vz
389 type(field_t),
intent(inout) :: vxb, vyb, vzb
390 integer,
intent(in) :: n
391 type(field_t),
intent(inout) :: fx, fy, fz
392 type(c_ptr) :: fx_d, fy_d, fz_d
394 if (neko_bcknd_device .eq. 1)
then
400 call conv1(this%temp, vx%x, vxb%x, vyb%x, vzb%x, xh, coef)
401 call device_subcol3 (fx_d, coef%B_d, this%temp_d, n)
402 call conv1(this%temp, vxb%x, vx%x, vy%x, vz%x, xh, coef)
403 call device_subcol3 (fx_d, coef%B_d, this%temp_d, n)
406 call conv1(this%temp, vy%x, vxb%x, vyb%x, vzb%x, xh, coef)
407 call device_subcol3 (fy_d, coef%B_d, this%temp_d, n)
408 call conv1(this%temp, vyb%x, vx%x, vy%x, vz%x, xh, coef)
409 call device_subcol3 (fy_d, coef%B_d, this%temp_d, n)
412 if (coef%Xh%lz .eq. 1)
then
413 call device_rzero (this%temp_d, n)
415 call conv1(this%temp, vz%x, vxb%x, vyb%x, vzb%x, xh, coef)
416 call device_subcol3(fz_d, coef%B_d, this%temp_d, n)
417 call conv1(this%temp, vzb%x, vx%x, vy%x, vz%x, xh, coef)
418 call device_subcol3(fz_d, coef%B_d, this%temp_d, n)
422 call conv1(this%temp, vx%x, vxb%x, vyb%x, vzb%x, xh, coef)
423 call subcol3 (fx%x, coef%B, this%temp, n)
424 call conv1(this%temp, vxb%x, vx%x, vy%x, vz%x, xh, coef)
425 call subcol3 (fx%x, coef%B, this%temp, n)
428 call conv1(this%temp, vy%x, vxb%x, vyb%x, vzb%x, xh, coef)
429 call subcol3 (fy%x, coef%B, this%temp, n)
430 call conv1(this%temp, vyb%x, vx%x, vy%x, vz%x, xh, coef)
431 call subcol3 (fy%x, coef%B, this%temp, n)
434 if (coef%Xh%lz .eq. 1)
then
435 call rzero (this%temp, n)
437 call conv1(this%temp, vz%x, vxb%x, vyb%x, vzb%x, xh, coef)
438 call subcol3(fz%x, coef%B, this%temp, n)
439 call conv1(this%temp, vzb%x, vx%x, vy%x, vz%x, xh, coef)
440 call subcol3(fz%x, coef%B, this%temp, n)
444 end subroutine linear_advection_no_dealias
460 subroutine compute_adjoint_scalar_advection_no_dealias(this, vxb, vyb, vzb, &
461 s, fs, Xh, coef, n, dt)
463 type(field_t),
intent(inout) :: vxb, vyb, vzb
464 type(field_t),
intent(inout) :: s
465 type(field_t),
intent(inout) :: fs
466 type(space_t),
intent(inout) :: Xh
467 type(coef_t),
intent(inout) :: coef
468 integer,
intent(in) :: n
469 real(kind=rp),
intent(in),
optional :: dt
470 real(kind=rp),
dimension(Xh%lxyz) :: w1, w2, w3, w4, w5, w6
472 type(field_t),
pointer :: w1_d, w2_d, w3_d, w4_d, w5_d, w6_d
473 integer :: temp_indices(6)
475 if (neko_bcknd_device .eq. 1)
then
476 call neko_scratch_registry%request_field(w1_d, temp_indices(1))
477 call neko_scratch_registry%request_field(w2_d, temp_indices(2))
478 call neko_scratch_registry%request_field(w3_d, temp_indices(3))
479 call neko_scratch_registry%request_field(w4_d, temp_indices(4))
480 call neko_scratch_registry%request_field(w5_d, temp_indices(5))
481 call neko_scratch_registry%request_field(w6_d, temp_indices(6))
483 call adjoint_weak_no_dealias_device(fs%x_d, s%x_d, &
484 vxb%x, vyb%x, vzb%x, &
486 w1_d, w2_d, w3_d, w4_d, w5_d, w6_d)
488 call neko_scratch_registry%relinquish_field(temp_indices)
491 do e = 1, coef%msh%nelv
494 call adjoint_weak_no_dealias_cpu( &
495 fs%x(:,:,:,e), s%x(1,1,1,e), &
496 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
497 e, coef, xh, xh%lxyz, &
498 w1, w2, w3, w4, w5, w6)
502 end subroutine compute_adjoint_scalar_advection_no_dealias
Subroutines to add perturbed advection terms to the RHS of a transport equation.
Subroutines to add advection terms to the RHS of a transport equation.
Type encapsulating advection routines with no dealiasing applied.
Base abstract type for computing the advection operator.