Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
adv_adjoint_no_dealias.f90
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
37 use num_types, only: rp
38 use math, only: subcol3, rzero
39 use space, only: space_t
40 use field, only: field_t
41 use coefs, only: coef_t
42 use scratch_registry, only: neko_scratch_registry
43 use neko_config, only: neko_bcknd_device, neko_bcknd_sx, neko_bcknd_xsmm, &
44 neko_bcknd_opencl, neko_bcknd_cuda, neko_bcknd_hip
45 use operators, only: opgrad, conv1, cdtp
46 use device_math, only: device_vdot3, device_sub2, device_add4, &
47 device_col3, device_subcol3, device_rzero
48 use device, only: device_free, device_map, device_get_ptr
49 use, intrinsic :: iso_c_binding, only: c_ptr, c_null_ptr, &
50 c_associated
51 implicit none
52 private
53
55 type, public, extends(advection_adjoint_t) :: adv_lin_no_dealias_t
56 real(kind=rp), allocatable :: temp(:)
57 type(c_ptr) :: temp_d = c_null_ptr
58 contains
62 procedure, pass(this) :: compute_linear => linear_advection_no_dealias
68 procedure, pass(this) :: compute_adjoint => adjoint_advection_no_dealias
70 ! If one integrates by parts, this essentially switches sign and adds some
71 ! boundary terms.
72 ! We keep the differential operator on the test function
73 procedure, pass(this) :: compute_adjoint_scalar => &
74 compute_adjoint_scalar_advection_no_dealias
75 ! NOTE
76 ! This linearized advection term is the same as a normal advection term
77 ! so not sure what to do here...
78
80 procedure, pass(this) :: init => init_no_dealias
82 procedure, pass(this) :: free => free_no_dealias
84
85contains
86
90 subroutine init_no_dealias(this, coef)
91 class(adv_lin_no_dealias_t), intent(inout) :: this
92 type(coef_t), intent(in) :: coef
93
94 allocate(this%temp(coef%dof%size()))
95
96 if (neko_bcknd_device .eq. 1) then
97 call device_map(this%temp, this%temp_d, coef%dof%size())
98 end if
99
100 end subroutine init_no_dealias
101
103 subroutine free_no_dealias(this)
104 class(adv_lin_no_dealias_t), intent(inout) :: this
105
106 if (allocated(this%temp)) then
107 deallocate(this%temp)
108 end if
109 if (c_associated(this%temp_d)) then
110 call device_free(this%temp_d)
111 end if
112 end subroutine free_no_dealias
113
131 subroutine adjoint_advection_no_dealias(this, vx, vy, vz, vxb, vyb, vzb, fx, &
132 fy, fz, Xh, coef, n)
133 implicit none
134 class(adv_lin_no_dealias_t), intent(inout) :: this
135 type(space_t), intent(inout) :: Xh
136 type(coef_t), intent(inout) :: coef
137 type(field_t), intent(inout) :: vx, vy, vz
138 type(field_t), intent(inout) :: vxb, vyb, vzb
139 integer, intent(in) :: n
140 type(field_t), intent(inout) :: fx, fy, fz
141 type(c_ptr) :: fx_d, fy_d, fz_d
142 type(c_ptr) :: vx_d, vy_d, vz_d
143
144 ! these are gradients of U_b (one element)
145 real(kind=rp), dimension(Xh%lxyz) :: duxb, dvxb, dwxb
146 real(kind=rp), dimension(Xh%lxyz) :: duyb, dvyb, dwyb
147 real(kind=rp), dimension(Xh%lxyz) :: duzb, dvzb, dwzb
148 ! temporary arrays
149 integer :: e, i, idx, idxx
150
151
152 type(field_t), pointer :: tduxb, tdvxb, tdwxb, tduyb, tdvyb, tdwyb, tduzb, &
153 tdvzb, tdwzb
154 integer :: temp_indices(9)
155
156
157 if (neko_bcknd_device .eq. 1) then
158 call neko_scratch_registry%request_field(tduxb, temp_indices(1))
159 call neko_scratch_registry%request_field(tdvxb, temp_indices(2))
160 call neko_scratch_registry%request_field(tdwxb, temp_indices(3))
161 call neko_scratch_registry%request_field(tduyb, temp_indices(4))
162 call neko_scratch_registry%request_field(tdvyb, temp_indices(5))
163 call neko_scratch_registry%request_field(tdwyb, temp_indices(6))
164 call neko_scratch_registry%request_field(tduzb, temp_indices(7))
165 call neko_scratch_registry%request_field(tdvzb, temp_indices(8))
166 call neko_scratch_registry%request_field(tdwzb, temp_indices(9))
167 fx_d = fx%x_d
168 fy_d = fy%x_d
169 fz_d = fz%x_d
170 vx_d = vx%x_d
171 vy_d = vy%x_d
172 vz_d = vz%x_d
173
174 ! u . grad U_b^T
175 !-----------------------------
176 ! take all the gradients
177 call opgrad(tduxb%x, tduyb%x, tduzb%x, vxb%x, coef)
178 call opgrad(tdvxb%x, tdvyb%x, tdvzb%x, vyb%x, coef)
179 call opgrad(tdwxb%x, tdwyb%x, tdwzb%x, vzb%x, coef)
180
181
182 ! traspose and multiply
183 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
184 tduxb%x_d, tdvxb%x_d, tdwxb%x_d, n)
185 call device_sub2(fx_d, this%temp_d, n)
186
187 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
188 tduyb%x_d, tdvyb%x_d, tdwyb%x_d, n)
189 call device_sub2(fy_d, this%temp_d, n)
190
191 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
192 tduzb%x_d, tdvzb%x_d, tdwzb%x_d, n)
193 call device_sub2(fz_d, this%temp_d, n)
194
195 ! \int \grad v . U_b ^ u
196 ! with '^' an outer product
197 associate(w1 => tduxb, w2 => tdvxb, w3 => tdwxb, &
198 w4 => tduyb, w5 => tdvyb, w6 => tdwyb)
199 call adjoint_weak_no_dealias_device(fx_d, vx_d, &
200 vxb%x, vyb%x, vzb%x, &
201 coef, xh, n, &
202 w1, w2, w3, w4, w5, w6)
203
204 call adjoint_weak_no_dealias_device(fy_d, vy_d, &
205 vxb%x, vyb%x, vzb%x, &
206 coef, xh, n, &
207 w1, w2, w3, w4, w5, w6)
208
209 call adjoint_weak_no_dealias_device(fz_d, vz_d, &
210 vxb%x, vyb%x, vzb%x, &
211 coef, xh, n, &
212 w1, w2, w3, w4, w5, w6)
213 end associate
214
215 call neko_scratch_registry%relinquish_field(temp_indices)
216 else
217 do e = 1, coef%msh%nelv
218 ! u . grad U_b^T
219 !-----------------------------
220 call opgrad(duxb, duyb, duzb, vxb%x, coef, e, e)
221 call opgrad(dvxb, dvyb, dvzb, vyb%x, coef, e, e)
222 call opgrad(dwxb, dwyb, dwzb, vzb%x, coef, e, e)
223
224 ! traspose and multiply
225 idx = (e - 1)*xh%lxyz + 1
226 do i = 1, xh%lxyz
227 idxx = idx + i
228 fx%x(idxx, 1, 1, 1) = fx%x(idxx, 1, 1, 1) - ( &
229 vx%x(i,1,1,e)*duxb(i) + &
230 vy%x(i,1,1,e)*dvxb(i) + &
231 vz%x(i,1,1,e)*dwxb(i) )
232
233 fy%x(idxx, 1, 1, 1) = fy%x(idxx, 1, 1, 1) - ( &
234 vx%x(i,1,1,e)*duyb(i) + &
235 vy%x(i,1,1,e)*dvyb(i) + &
236 vz%x(i,1,1,e)*dwyb(i))
237
238 fz%x(idxx, 1, 1, 1) = fz%x(idxx, 1, 1, 1) - ( &
239 vx%x(i,1,1,e)*duzb(i) + &
240 vy%x(i,1,1,e)*dvzb(i) + &
241 vz%x(i,1,1,e)*dwzb(i))
242 end do
243
244 ! \int \grad v . U_b ^ u
245 ! with ^ an outer product
246 ! use these as work arrays
247 associate(w1 => duxb, w2 => dvxb, w3 => dwxb, &
248 w4 => duyb, w5 => dvyb, w6 => dwyb)
249 call adjoint_weak_no_dealias_cpu( &
250 fx%x(:,:,:,e), vx%x(1,1,1,e), &
251 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
252 e, coef, xh, xh%lxyz, &
253 w1, w2, w3, w4, w5, w6)
254
255 call adjoint_weak_no_dealias_cpu( &
256 fy%x(:,:,:,e), vy%x(1,1,1,e), &
257 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
258 e, coef, xh, xh%lxyz, &
259 w1, w2, w3, w4, w5, w6)
260
261 call adjoint_weak_no_dealias_cpu( &
262 fz%x(:,:,:,e), vz%x(1,1,1,e), &
263 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
264 e, coef, xh, xh%lxyz, &
265 w1, w2, w3, w4, w5, w6)
266 end associate
267 end do
268
269 end if
270
271 end subroutine adjoint_advection_no_dealias
272
285 subroutine adjoint_weak_no_dealias_device(f_d, u_i_d, ub, vb, wb, coef, Xh, &
286 n, work1, work2, work3, w1, w2, w3)
287 integer, intent(in) :: n
288 type(c_ptr), intent(inout) :: f_d
289 type(c_ptr), intent(in) :: u_i_d
290 real(kind=rp), intent(inout), dimension(n) :: ub, vb, wb
291 type(field_t), intent(inout) :: w1, w2, w3
292 type(field_t), intent(inout) :: work1, work2, work3
293 type(space_t), intent(inout) :: Xh
294 type(coef_t), intent(inout) :: coef
295 type(c_ptr) :: ub_d, vb_d, wb_d
296 type(c_ptr) :: work1_d, work2_d, work3_d, w1_d, w2_d, w3_d
297
298 work1_d = work1%x_d
299 work2_d = work2%x_d
300 work3_d = work3%x_d
301 w1_d = w1%x_d
302 w2_d = w2%x_d
303 w3_d = w3%x_d
304 ub_d = device_get_ptr(ub)
305 vb_d = device_get_ptr(vb)
306 wb_d = device_get_ptr(wb)
307
308 ! outer product
309 call device_col3(work1_d, u_i_d, ub_d, n)
310 call device_col3(work2_d, u_i_d, vb_d, n)
311 call device_col3(work3_d, u_i_d, wb_d, n)
312 ! D^T
313 call cdtp(w1%x, work1%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
314 call cdtp(w2%x, work2%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
315 call cdtp(w3%x, work3%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
316 ! sum them
317 call device_add4(work1_d, w1_d, w2_d, w3_d , n)
318 call device_sub2(f_d, work1_d, n)
319 end subroutine adjoint_weak_no_dealias_device
320
333 subroutine adjoint_weak_no_dealias_cpu(f, u_i, ub, vb, wb, e, coef, Xh, n, &
334 work1, work2, work3, w1, w2, w3)
335 implicit none
336 integer, intent(in) :: e, n
337 integer :: i
338 real(kind=rp), intent(inout), dimension(n) :: f
339 real(kind=rp), intent(inout), dimension(n) :: u_i
340 real(kind=rp), intent(inout), dimension(n) :: ub, vb, wb
341 real(kind=rp), intent(inout), dimension(n) :: w1, w2, w3
342 real(kind=rp), intent(inout), dimension(n) :: work1, work2, work3
343 type(space_t), intent(inout) :: xh
344 type(coef_t), intent(inout) :: coef
345
346 ! outer product
347 do i = 1, xh%lxyz
348 work1(i) = u_i(i)*ub(i)
349 work2(i) = u_i(i)*vb(i)
350 work3(i) = u_i(i)*wb(i)
351 end do
352
353 ! D^T
354 call cdtp(w1, work1, coef%drdx, coef%dsdx, coef%dtdx, coef, e ,e)
355 call cdtp(w2, work2, coef%drdy, coef%dsdy, coef%dtdy, coef, e ,e)
356 call cdtp(w3, work3, coef%drdz, coef%dsdz, coef%dtdz, coef, e, e)
357
358 ! sum them
359 do i = 1, xh%lxyz
360 f(i) = f(i) - w1(i) + w2(i) + w3(i)
361 end do
362 end subroutine adjoint_weak_no_dealias_cpu
363
364
365
382 subroutine linear_advection_no_dealias(this, vx, vy, vz, vxb, vyb, vzb, fx, &
383 fy, fz, Xh, coef, n)
384 implicit none
385 class(adv_lin_no_dealias_t), intent(inout) :: this
386 type(space_t), intent(inout) :: Xh
387 type(coef_t), intent(inout) :: coef
388 type(field_t), intent(inout) :: vx, vy, vz
389 type(field_t), intent(inout) :: vxb, vyb, vzb
390 integer, intent(in) :: n
391 type(field_t), intent(inout) :: fx, fy, fz
392 type(c_ptr) :: fx_d, fy_d, fz_d
393
394 if (neko_bcknd_device .eq. 1) then
395 fx_d = fx%x_d
396 fy_d = fy%x_d
397 fz_d = fz%x_d
398
399 ! (x)
400 call conv1(this%temp, vx%x, vxb%x, vyb%x, vzb%x, xh, coef)
401 call device_subcol3 (fx_d, coef%B_d, this%temp_d, n)
402 call conv1(this%temp, vxb%x, vx%x, vy%x, vz%x, xh, coef)
403 call device_subcol3 (fx_d, coef%B_d, this%temp_d, n)
404
405 ! (y)
406 call conv1(this%temp, vy%x, vxb%x, vyb%x, vzb%x, xh, coef)
407 call device_subcol3 (fy_d, coef%B_d, this%temp_d, n)
408 call conv1(this%temp, vyb%x, vx%x, vy%x, vz%x, xh, coef)
409 call device_subcol3 (fy_d, coef%B_d, this%temp_d, n)
410
411 ! (z)
412 if (coef%Xh%lz .eq. 1) then
413 call device_rzero (this%temp_d, n)
414 else
415 call conv1(this%temp, vz%x, vxb%x, vyb%x, vzb%x, xh, coef)
416 call device_subcol3(fz_d, coef%B_d, this%temp_d, n)
417 call conv1(this%temp, vzb%x, vx%x, vy%x, vz%x, xh, coef)
418 call device_subcol3(fz_d, coef%B_d, this%temp_d, n)
419 end if
420 else
421 ! (x)
422 call conv1(this%temp, vx%x, vxb%x, vyb%x, vzb%x, xh, coef)
423 call subcol3 (fx%x, coef%B, this%temp, n)
424 call conv1(this%temp, vxb%x, vx%x, vy%x, vz%x, xh, coef)
425 call subcol3 (fx%x, coef%B, this%temp, n)
426
427 ! (y)
428 call conv1(this%temp, vy%x, vxb%x, vyb%x, vzb%x, xh, coef)
429 call subcol3 (fy%x, coef%B, this%temp, n)
430 call conv1(this%temp, vyb%x, vx%x, vy%x, vz%x, xh, coef)
431 call subcol3 (fy%x, coef%B, this%temp, n)
432
433 ! (z)
434 if (coef%Xh%lz .eq. 1) then
435 call rzero (this%temp, n)
436 else
437 call conv1(this%temp, vz%x, vxb%x, vyb%x, vzb%x, xh, coef)
438 call subcol3(fz%x, coef%B, this%temp, n)
439 call conv1(this%temp, vzb%x, vx%x, vy%x, vz%x, xh, coef)
440 call subcol3(fz%x, coef%B, this%temp, n)
441 end if
442 end if
443
444 end subroutine linear_advection_no_dealias
445
460 subroutine compute_adjoint_scalar_advection_no_dealias(this, vxb, vyb, vzb, &
461 s, fs, Xh, coef, n, dt)
462 class(adv_lin_no_dealias_t), intent(inout) :: this
463 type(field_t), intent(inout) :: vxb, vyb, vzb
464 type(field_t), intent(inout) :: s
465 type(field_t), intent(inout) :: fs
466 type(space_t), intent(inout) :: Xh
467 type(coef_t), intent(inout) :: coef
468 integer, intent(in) :: n
469 real(kind=rp), intent(in), optional :: dt
470 real(kind=rp), dimension(Xh%lxyz) :: w1, w2, w3, w4, w5, w6
471 integer :: e
472 type(field_t), pointer :: w1_d, w2_d, w3_d, w4_d, w5_d, w6_d
473 integer :: temp_indices(6)
474
475 if (neko_bcknd_device .eq. 1) then
476 call neko_scratch_registry%request_field(w1_d, temp_indices(1))
477 call neko_scratch_registry%request_field(w2_d, temp_indices(2))
478 call neko_scratch_registry%request_field(w3_d, temp_indices(3))
479 call neko_scratch_registry%request_field(w4_d, temp_indices(4))
480 call neko_scratch_registry%request_field(w5_d, temp_indices(5))
481 call neko_scratch_registry%request_field(w6_d, temp_indices(6))
482
483 call adjoint_weak_no_dealias_device(fs%x_d, s%x_d, &
484 vxb%x, vyb%x, vzb%x, &
485 coef, xh, n, &
486 w1_d, w2_d, w3_d, w4_d, w5_d, w6_d)
487
488 call neko_scratch_registry%relinquish_field(temp_indices)
489
490 else
491 do e = 1, coef%msh%nelv
492 ! \int \grad r . U_b s
493 !-----------------------------
494 call adjoint_weak_no_dealias_cpu( &
495 fs%x(:,:,:,e), s%x(1,1,1,e), &
496 vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
497 e, coef, xh, xh%lxyz, &
498 w1, w2, w3, w4, w5, w6)
499 end do
500 end if
501
502 end subroutine compute_adjoint_scalar_advection_no_dealias
503
504end module adv_lin_no_dealias
Subroutines to add perturbed advection terms to the RHS of a transport equation.
Subroutines to add advection terms to the RHS of a transport equation.
Type encapsulating advection routines with no dealiasing applied.
Base abstract type for computing the advection operator.