Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
adv_adjoint_no_dealias.f90
Go to the documentation of this file.
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
37 use num_types, only: rp
38 use math, only: subcol3, rzero
39 use space, only: space_t
40 use field, only: field_t
41 use coefs, only: coef_t
42 use scratch_registry, only: neko_scratch_registry
43 use neko_config, only: neko_bcknd_device, neko_bcknd_sx, neko_bcknd_xsmm, &
44 neko_bcknd_opencl, neko_bcknd_cuda, neko_bcknd_hip
45 use operators, only: opgrad, conv1, cdtp
46 use device_math, only: device_vdot3, device_sub2, device_add4, &
47 device_col3, device_subcol3, device_rzero
48 use device, only: device_free, device_map, device_get_ptr
49 use, intrinsic :: iso_c_binding, only: c_ptr, c_null_ptr, &
50 c_associated
51 implicit none
52 private
53
55 type, public, extends(advection_adjoint_t) :: adv_lin_no_dealias_t
56 real(kind=rp), allocatable :: temp(:)
57 type(c_ptr) :: temp_d = c_null_ptr
58 contains
62 procedure, pass(this) :: compute_linear => linear_advection_no_dealias
68 procedure, pass(this) :: compute_adjoint => adjoint_advection_no_dealias
70 procedure, pass(this) :: init => init_no_dealias
72 procedure, pass(this) :: free => free_no_dealias
74
75contains
76
79 subroutine init_no_dealias(this, coef)
80 class(adv_lin_no_dealias_t), intent(inout) :: this
81 type(coef_t), intent(in) :: coef
82
83 allocate(this%temp(coef%dof%size()))
84
85 if (neko_bcknd_device .eq. 1) then
86 call device_map(this%temp, this%temp_d, coef%dof%size())
87 end if
88
89 end subroutine init_no_dealias
90
92 subroutine free_no_dealias(this)
93 class(adv_lin_no_dealias_t), intent(inout) :: this
94
95 if (allocated(this%temp)) then
96 deallocate(this%temp)
97 end if
98 if (c_associated(this%temp_d)) then
99 call device_free(this%temp_d)
100 end if
101 end subroutine free_no_dealias
102
119 subroutine adjoint_advection_no_dealias(this, vx, vy, vz, vxb, vyb, vzb, fx, &
120 fy, fz, Xh, coef, n)
121 implicit none
122 class(adv_lin_no_dealias_t), intent(inout) :: this
123 type(space_t), intent(inout) :: Xh
124 type(coef_t), intent(inout) :: coef
125 type(field_t), intent(inout) :: vx, vy, vz
126 type(field_t), intent(inout) :: vxb, vyb, vzb
127 integer, intent(in) :: n
128 type(field_t), intent(inout) :: fx, fy, fz
129 type(c_ptr) :: fx_d, fy_d, fz_d
130 type(c_ptr) :: vx_d, vy_d, vz_d
131
132 ! these are gradients of U_b (one element)
133 real(kind=rp), dimension(Xh%lxyz) :: duxb, dvxb, dwxb
134 real(kind=rp), dimension(Xh%lxyz) :: duyb, dvyb, dwyb
135 real(kind=rp), dimension(Xh%lxyz) :: duzb, dvzb, dwzb
136 ! temporary arrays
137 integer :: e, i, idx, idxx
138
139
140 type(field_t), pointer :: tduxb, tdvxb, tdwxb, tduyb, tdvyb, tdwyb, tduzb, &
141 tdvzb, tdwzb
142 integer :: temp_indices(9)
143
144
145 if (neko_bcknd_device .eq. 1) then
146 call neko_scratch_registry%request_field(tduxb, temp_indices(1))
147 call neko_scratch_registry%request_field(tdvxb, temp_indices(2))
148 call neko_scratch_registry%request_field(tdwxb, temp_indices(3))
149 call neko_scratch_registry%request_field(tduyb, temp_indices(4))
150 call neko_scratch_registry%request_field(tdvyb, temp_indices(5))
151 call neko_scratch_registry%request_field(tdwyb, temp_indices(6))
152 call neko_scratch_registry%request_field(tduzb, temp_indices(7))
153 call neko_scratch_registry%request_field(tdvzb, temp_indices(8))
154 call neko_scratch_registry%request_field(tdwzb, temp_indices(9))
155 fx_d = fx%x_d
156 fy_d = fy%x_d
157 fz_d = fz%x_d
158 vx_d = vx%x_d
159 vy_d = vy%x_d
160 vz_d = vz%x_d
161
162 ! u . grad U_b^T
163 !-----------------------------
164 ! take all the gradients
165 call opgrad(tduxb%x, tduyb%x, tduzb%x, vxb%x, coef)
166 call opgrad(tdvxb%x, tdvyb%x, tdvzb%x, vyb%x, coef)
167 call opgrad(tdwxb%x, tdwyb%x, tdwzb%x, vzb%x, coef)
168
169
170 ! traspose and multiply
171 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
172 tduxb%x_d, tdvxb%x_d, tdwxb%x_d, n)
173 call device_sub2(fx_d, this%temp_d, n)
174
175 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
176 tduyb%x_d, tdvyb%x_d, tdwyb%x_d, n)
177 call device_sub2(fy_d, this%temp_d, n)
178
179 call device_vdot3(this%temp_d, vx_d, vy_d, vz_d, &
180 tduzb%x_d, tdvzb%x_d, tdwzb%x_d, n)
181 call device_sub2(fz_d, this%temp_d, n)
182
183 ! \int \grad v . U_b ^ u
184 ! with '^' an outer product
185 call adjoint_weak_no_dealias_device(fx_d, vx_d, &
186 vxb%x, vyb%x, vzb%x, &
187 coef, xh, n, &
188 tduxb, tdvxb, tdwxb, &
189 tduyb, tdvyb, tdwyb)
190
191 call adjoint_weak_no_dealias_device(fy_d, vy_d, &
192 vxb%x, vyb%x, vzb%x, &
193 coef, xh, n, &
194 tduxb, tdvxb, tdwxb, &
195 tduyb, tdvyb, tdwyb)
196
197 call adjoint_weak_no_dealias_device(fz_d, vz_d, &
198 vxb%x, vyb%x, vzb%x, &
199 coef, xh, n, &
200 tduxb, tdvxb, tdwxb, &
201 tduyb, tdvyb, tdwyb)
202
203 call neko_scratch_registry%relinquish_field(temp_indices)
204 else
205 do e = 1, coef%msh%nelv
206 ! u . grad U_b^T
207 !-----------------------------
208 call opgrad(duxb, duyb, duzb, vxb%x, coef, e, e)
209 call opgrad(dvxb, dvyb, dvzb, vyb%x, coef, e, e)
210 call opgrad(dwxb, dwyb, dwzb, vzb%x, coef, e, e)
211
212 ! traspose and multiply
213 idx = (e - 1)*xh%lxyz + 1
214 do i = 1, xh%lxyz
215 idxx = idx + i
216 fx%x(idxx, 1, 1, 1) = fx%x(idxx, 1, 1, 1) - ( &
217 & vx%x(i,1,1,e)*duxb(i) + &
218 & vy%x(i,1,1,e)*dvxb(i) + &
219 & vz%x(i,1,1,e)*dwxb(i) )
220
221 fy%x(idxx, 1, 1, 1) = fy%x(idxx, 1, 1, 1) - ( &
222 & vx%x(i,1,1,e)*duyb(i) + &
223 & vy%x(i,1,1,e)*dvyb(i) + &
224 & vz%x(i,1,1,e)*dwyb(i))
225
226 fz%x(idxx, 1, 1, 1) = fz%x(idxx, 1, 1, 1) - ( &
227 & vx%x(i,1,1,e)*duzb(i) + &
228 & vy%x(i,1,1,e)*dvzb(i) + &
229 & vz%x(i,1,1,e)*dwzb(i))
230 end do
231
232 ! \int \grad v . U_b ^ u
233 ! with ^ an outer product
235 & fx%x(:,:,:,e), vx%x(1,1,1,e), &
236 & vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
237 & e, coef, xh, xh%lxyz, &
238 & duxb, dvxb, dwxb, duyb, dvyb, dwyb)
239
241 & fy%x(:,:,:,e), vy%x(1,1,1,e), &
242 & vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
243 & e, coef, xh, xh%lxyz, &
244 & duxb, dvxb, dwxb, duyb, dvyb, dwyb)
245
247 & fz%x(:,:,:,e), vz%x(1,1,1,e), &
248 & vxb%x(1,1,1,e), vyb%x(1,1,1,e), vzb%x(1,1,1,e), &
249 & e, coef, xh, xh%lxyz, &
250 & duxb, dvxb, dwxb, duyb, dvyb, dwyb)
251 end do
252
253 end if
254
255 end subroutine adjoint_advection_no_dealias
256
269 subroutine adjoint_weak_no_dealias_device(f_d, u_i_d, ub, vb, wb, coef, Xh, &
270 n, work1, work2, work3, w1, w2, w3)
271 integer, intent(in) :: n
272 type(c_ptr), intent(inout) :: f_d
273 type(c_ptr), intent(in) :: u_i_d
274 real(kind=rp), intent(inout), dimension(n) :: ub, vb, wb
275 type(field_t), intent(inout) :: w1, w2, w3
276 type(field_t), intent(inout) :: work1, work2, work3
277 type(space_t), intent(inout) :: Xh
278 type(coef_t), intent(inout) :: coef
279 type(c_ptr) :: ub_d, vb_d, wb_d
280 type(c_ptr) :: work1_d, work2_d, work3_d, w1_d, w2_d, w3_d
281
282 work1_d = work1%x_d
283 work2_d = work2%x_d
284 work3_d = work3%x_d
285 w1_d = w1%x_d
286 w2_d = w2%x_d
287 w3_d = w3%x_d
288 ub_d = device_get_ptr(ub)
289 vb_d = device_get_ptr(vb)
290 wb_d = device_get_ptr(wb)
291
292 ! outer product
293 call device_col3(work1_d, u_i_d, ub_d, n)
294 call device_col3(work2_d, u_i_d, vb_d, n)
295 call device_col3(work3_d, u_i_d, wb_d, n)
296 ! D^T
297 call cdtp(w1%x, work1%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
298 call cdtp(w2%x, work2%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
299 call cdtp(w3%x, work3%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
300 ! sum them
301 call device_add4(work1_d, w1_d, w2_d, w3_d , n)
302 call device_sub2(f_d, work1_d, n)
303 end subroutine adjoint_weak_no_dealias_device
304
317 subroutine adjoint_weak_no_dealias_cpu(f, u_i, ub, vb, wb, e, coef, Xh, n, &
318 work1, work2, work3, w1, w2, w3)
319 implicit none
320 integer, intent(in) :: e, n
321 integer :: i
322 real(kind=rp), intent(inout), dimension(n) :: f
323 real(kind=rp), intent(inout), dimension(n) :: u_i
324 real(kind=rp), intent(inout), dimension(n) :: ub, vb, wb
325 real(kind=rp), intent(inout), dimension(n) :: w1, w2, w3
326 real(kind=rp), intent(inout), dimension(n) :: work1, work2, work3
327 type(space_t), intent(inout) :: xh
328 type(coef_t), intent(inout) :: coef
329
330 ! outer product
331 do i = 1, xh%lxyz
332 work1(i) = u_i(i)*ub(i)
333 work2(i) = u_i(i)*vb(i)
334 work3(i) = u_i(i)*wb(i)
335 end do
336
337 ! D^T
338 call cdtp(w1, work1, coef%drdx, coef%dsdx, coef%dtdx, coef, e ,e)
339 call cdtp(w2, work2, coef%drdy, coef%dsdy, coef%dtdy, coef, e ,e)
340 call cdtp(w3, work3, coef%drdz, coef%dsdz, coef%dtdz, coef, e, e)
341
342 ! sum them
343 do i = 1, xh%lxyz
344 f(i) = f(i) - w1(i) + w2(i) + w3(i)
345 end do
346 end subroutine adjoint_weak_no_dealias_cpu
347
348
349
365 subroutine linear_advection_no_dealias(this, vx, vy, vz, vxb, vyb, vzb, fx, &
366 fy, fz, Xh, coef, n)
367 implicit none
368 class(adv_lin_no_dealias_t), intent(inout) :: this
369 type(space_t), intent(inout) :: Xh
370 type(coef_t), intent(inout) :: coef
371 type(field_t), intent(inout) :: vx, vy, vz
372 type(field_t), intent(inout) :: vxb, vyb, vzb
373 integer, intent(in) :: n
374 type(field_t), intent(inout) :: fx, fy, fz
375 type(c_ptr) :: fx_d, fy_d, fz_d
376
377 if (neko_bcknd_device .eq. 1) then
378 fx_d = fx%x_d
379 fy_d = fy%x_d
380 fz_d = fz%x_d
381
382 ! (x)
383 call conv1(this%temp, vx%x, vxb%x, vyb%x, vzb%x, xh, coef)
384 call device_subcol3 (fx_d, coef%B_d, this%temp_d, n)
385 call conv1(this%temp, vxb%x, vx%x, vy%x, vz%x, xh, coef)
386 call device_subcol3 (fx_d, coef%B_d, this%temp_d, n)
387
388 ! (y)
389 call conv1(this%temp, vy%x, vxb%x, vyb%x, vzb%x, xh, coef)
390 call device_subcol3 (fy_d, coef%B_d, this%temp_d, n)
391 call conv1(this%temp, vyb%x, vx%x, vy%x, vz%x, xh, coef)
392 call device_subcol3 (fy_d, coef%B_d, this%temp_d, n)
393
394 ! (z)
395 if (coef%Xh%lz .eq. 1) then
396 call device_rzero (this%temp_d, n)
397 else
398 call conv1(this%temp, vz%x, vxb%x, vyb%x, vzb%x, xh, coef)
399 call device_subcol3(fz_d, coef%B_d, this%temp_d, n)
400 call conv1(this%temp, vzb%x, vx%x, vy%x, vz%x, xh, coef)
401 call device_subcol3(fz_d, coef%B_d, this%temp_d, n)
402 end if
403 else
404 ! (x)
405 call conv1(this%temp, vx%x, vxb%x, vyb%x, vzb%x, xh, coef)
406 call subcol3 (fx%x, coef%B, this%temp, n)
407 call conv1(this%temp, vxb%x, vx%x, vy%x, vz%x, xh, coef)
408 call subcol3 (fx%x, coef%B, this%temp, n)
409
410 ! (y)
411 call conv1(this%temp, vy%x, vxb%x, vyb%x, vzb%x, xh, coef)
412 call subcol3 (fy%x, coef%B, this%temp, n)
413 call conv1(this%temp, vyb%x, vx%x, vy%x, vz%x, xh, coef)
414 call subcol3 (fy%x, coef%B, this%temp, n)
415
416 ! (z)
417 if (coef%Xh%lz .eq. 1) then
418 call rzero (this%temp, n)
419 else
420 call conv1(this%temp, vz%x, vxb%x, vyb%x, vzb%x, xh, coef)
421 call subcol3(fz%x, coef%B, this%temp, n)
422 call conv1(this%temp, vzb%x, vx%x, vy%x, vz%x, xh, coef)
423 call subcol3(fz%x, coef%B, this%temp, n)
424 end if
425 end if
426
427 end subroutine linear_advection_no_dealias
428end module adv_lin_no_dealias
Subroutines to add perturbed advection terms to the RHS of a transport equation.
subroutine adjoint_advection_no_dealias(this, vx, vy, vz, vxb, vyb, vzb, fx, fy, fz, xh, coef, n)
Add the adjoint advection term for the fluid in weak form, i.e. , to the RHS.
subroutine adjoint_weak_no_dealias_device(f_d, u_i_d, ub, vb, wb, coef, xh, n, work1, work2, work3, w1, w2, w3)
Compute a single component of , to the RHS on device.
subroutine free_no_dealias(this)
Destructor.
subroutine linear_advection_no_dealias(this, vx, vy, vz, vxb, vyb, vzb, fx, fy, fz, xh, coef, n)
Add the linearized advection term for the fluid, i.e. , to the RHS.
subroutine adjoint_weak_no_dealias_cpu(f, u_i, ub, vb, wb, e, coef, xh, n, work1, work2, work3, w1, w2, w3)
Compute a single component of , to the RHS on CPU.
subroutine init_no_dealias(this, coef)
Constructor.
Subroutines to add advection terms to the RHS of a transport equation.
Type encapsulating advection routines with no dealiasing applied.
Base abstract type for computing the advection operator.