Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
adv_adjoint_dealias.f90
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
36 use num_types, only: rp
37 use math, only: vdot3, sub2
38 use space, only: space_t, gl
39 use field, only: field_t
40 use coefs, only: coef_t
41 use neko_config, only: neko_bcknd_device, neko_bcknd_sx, neko_bcknd_xsmm, &
42 neko_bcknd_opencl, neko_bcknd_cuda, neko_bcknd_hip
43 use operators, only: opgrad, cdtp
44 use interpolation, only: interpolator_t
45 use device_math, only: device_vdot3, device_sub2, device_col3, device_add4
46 use device, only: device_map
47 use utils, only: neko_error
48 use, intrinsic :: iso_c_binding, only: c_ptr, c_null_ptr
49 implicit none
50 private
51
53 type, public, extends(advection_adjoint_t) :: adv_lin_dealias_t
55 type(coef_t) :: coef_gl
57 type(coef_t), pointer :: coef_gll
59 type(interpolator_t) :: gll_to_gl
61 type(space_t) :: xh_gl
63 type(space_t), pointer :: xh_gll
64 real(kind=rp), allocatable :: temp(:), tbf(:)
66 real(kind=rp), allocatable :: tx(:), ty(:), tz(:)
67 real(kind=rp), allocatable :: vr(:), vs(:), vt(:)
69 type(c_ptr) :: temp_d = c_null_ptr
71 type(c_ptr) :: tbf_d = c_null_ptr
73 type(c_ptr) :: tx_d = c_null_ptr
75 type(c_ptr) :: ty_d = c_null_ptr
77 type(c_ptr) :: tz_d = c_null_ptr
79 type(c_ptr) :: vr_d = c_null_ptr
81 type(c_ptr) :: vs_d = c_null_ptr
83 type(c_ptr) :: vt_d = c_null_ptr
84
85 real(kind=rp), allocatable :: txb(:), tyb(:), tzb(:)
87 type(c_ptr) :: txb_d = c_null_ptr
89 type(c_ptr) :: tyb_d = c_null_ptr
91 type(c_ptr) :: tzb_d = c_null_ptr
92
93 real(kind=rp), allocatable :: duxb(:), duyb(:), duzb(:)
94 real(kind=rp), allocatable :: dvxb(:), dvyb(:), dvzb(:)
95 real(kind=rp), allocatable :: dwxb(:), dwyb(:), dwzb(:)
97 type(c_ptr) :: duxb_d = c_null_ptr
99 type(c_ptr) :: duyb_d = c_null_ptr
101 type(c_ptr) :: duzb_d = c_null_ptr
103 type(c_ptr) :: dvxb_d = c_null_ptr
105 type(c_ptr) :: dvyb_d = c_null_ptr
107 type(c_ptr) :: dvzb_d = c_null_ptr
109 type(c_ptr) :: dwxb_d = c_null_ptr
111 type(c_ptr) :: dwyb_d = c_null_ptr
113 type(c_ptr) :: dwzb_d = c_null_ptr
114
115 contains
119 procedure, pass(this) :: compute_linear => &
120 compute_linear_advection_dealias
126 procedure, pass(this) :: compute_adjoint => &
127 compute_adjoint_advection_dealias
129 ! If one integrates by parts, this essentially switches sign and adds some
130 ! boundary terms.
131 ! We keep the differential operator on the test function
132 procedure, pass(this) :: compute_adjoint_scalar => &
133 compute_adjoint_scalar_advection_dealias
134 ! NOTE
135 ! This linearized advection term is the same as a normal advection term
136 ! so not sure what to do here...
138 procedure, pass(this) :: init => init_dealias
140 procedure, pass(this) :: free => free_dealias
141 end type adv_lin_dealias_t
142
143contains
144
149 subroutine init_dealias(this, lxd, coef)
150 class(adv_lin_dealias_t), target, intent(inout) :: this
151 integer, intent(in) :: lxd
152 type(coef_t), intent(inout), target :: coef
153 integer :: nel, n_GL, n
154
155 call this%Xh_GL%init(gl, lxd, lxd, lxd)
156 this%Xh_GLL => coef%Xh
157 this%coef_GLL => coef
158 call this%GLL_to_GL%init(this%Xh_GL, this%Xh_GLL)
159
160 call this%coef_GL%init(this%Xh_GL, coef%msh)
161
162 nel = coef%msh%nelv
163 n_gl = nel*this%Xh_GL%lxyz
164 n = nel*coef%Xh%lxyz
165 call this%GLL_to_GL%map(this%coef_GL%drdx, coef%drdx, nel, this%Xh_GL)
166 call this%GLL_to_GL%map(this%coef_GL%dsdx, coef%dsdx, nel, this%Xh_GL)
167 call this%GLL_to_GL%map(this%coef_GL%dtdx, coef%dtdx, nel, this%Xh_GL)
168 call this%GLL_to_GL%map(this%coef_GL%drdy, coef%drdy, nel, this%Xh_GL)
169 call this%GLL_to_GL%map(this%coef_GL%dsdy, coef%dsdy, nel, this%Xh_GL)
170 call this%GLL_to_GL%map(this%coef_GL%dtdy, coef%dtdy, nel, this%Xh_GL)
171 call this%GLL_to_GL%map(this%coef_GL%drdz, coef%drdz, nel, this%Xh_GL)
172 call this%GLL_to_GL%map(this%coef_GL%dsdz, coef%dsdz, nel, this%Xh_GL)
173 call this%GLL_to_GL%map(this%coef_GL%dtdz, coef%dtdz, nel, this%Xh_GL)
174
175 if ((neko_bcknd_hip .eq. 1) .or. (neko_bcknd_cuda .eq. 1) .or. &
176 (neko_bcknd_opencl .eq. 1) .or. (neko_bcknd_sx .eq. 1) .or. &
177 (neko_bcknd_xsmm .eq. 1)) then
178 allocate(this%temp(n_gl))
179 allocate(this%tbf(n_gl))
180 allocate(this%tx(n_gl))
181 allocate(this%ty(n_gl))
182 allocate(this%tz(n_gl))
183 allocate(this%vr(n_gl))
184 allocate(this%vs(n_gl))
185 allocate(this%vt(n_gl))
186 allocate(this%duxb(n_gl))
187 allocate(this%duyb(n_gl))
188 allocate(this%duzb(n_gl))
189 allocate(this%dvxb(n_gl))
190 allocate(this%dvyb(n_gl))
191 allocate(this%dvzb(n_gl))
192 allocate(this%dwxb(n_gl))
193 allocate(this%dwyb(n_gl))
194 allocate(this%dwzb(n_gl))
195 allocate(this%txb(n_gl))
196 allocate(this%tyb(n_gl))
197 allocate(this%tzb(n_gl))
198 end if
199
200 if (neko_bcknd_device .eq. 1) then
201 call device_map(this%temp, this%temp_d, n_gl)
202 call device_map(this%tbf, this%tbf_d, n_gl)
203 call device_map(this%tx, this%tx_d, n_gl)
204 call device_map(this%ty, this%ty_d, n_gl)
205 call device_map(this%tz, this%tz_d, n_gl)
206 call device_map(this%vr, this%vr_d, n_gl)
207 call device_map(this%vs, this%vs_d, n_gl)
208 call device_map(this%vt, this%vt_d, n_gl)
209 call device_map(this%duxb, this%duxb_d, n_gl)
210 call device_map(this%duyb, this%duyb_d, n_gl)
211 call device_map(this%duzb, this%duzb_d, n_gl)
212 call device_map(this%dvxb, this%dvxb_d, n_gl)
213 call device_map(this%dvyb, this%dvyb_d, n_gl)
214 call device_map(this%dvzb, this%dvzb_d, n_gl)
215 call device_map(this%dwxb, this%dwxb_d, n_gl)
216 call device_map(this%dwyb, this%dwyb_d, n_gl)
217 call device_map(this%dwzb, this%dwzb_d, n_gl)
218 call device_map(this%txb, this%txb_d, n_gl)
219 call device_map(this%tyb, this%tyb_d, n_gl)
220 call device_map(this%tzb, this%tzb_d, n_gl)
221 end if
222
223 end subroutine init_dealias
224
226 subroutine free_dealias(this)
227 class(adv_lin_dealias_t), intent(inout) :: this
228 end subroutine free_dealias
229
230
248 subroutine compute_adjoint_advection_dealias(this, vx, vy, vz, vxb, vyb, &
249 vzb, fx, fy, fz, Xh, coef, n)
250 !! HARRY added vxb etc for baseflow
251 implicit none
252 class(adv_lin_dealias_t), intent(inout) :: this
253 type(space_t), intent(inout) :: Xh
254 type(coef_t), intent(inout) :: coef
255 type(field_t), intent(inout) :: vx, vy, vz
256 type(field_t), intent(inout) :: vxb, vyb, vzb
257 type(field_t), intent(inout) :: fx, fy, fz
258 integer, intent(in) :: n
259 real(kind=rp), dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz
260
261 ! u and U_b on dealiased mesh (one element)
262 real(kind=rp), dimension(this%Xh_GL%lxyz) :: tx, ty, tz
263 real(kind=rp), dimension(this%Xh_GL%lxyz) :: txb, tyb, tzb
264
265 ! gradients of U_b on dealiased mesh (one element)
266 real(kind=rp), dimension(this%Xh_GL%lxyz) :: duxb, dvxb, dwxb
267 real(kind=rp), dimension(this%Xh_GL%lxyz) :: duyb, dvyb, dwyb
268 real(kind=rp), dimension(this%Xh_GL%lxyz) :: duzb, dvzb, dwzb
269
270 real(kind=rp), dimension(this%Xh_GLL%lxyz) :: tempx, tempy, tempz
271 integer :: e, i, idx, nel, n_GL
272
273 nel = coef%msh%nelv
274 n_gl = nel * this%Xh_GL%lxyz
275 associate(c_gl => this%coef_GL)
276
277 if (neko_bcknd_device .eq. 1) then
278 ! Map baseflow to GL
279 call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
280 call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
281 call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
282
283 ! Map adjoint velocity to GL
284 call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
285 call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
286 call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
287
288
289 ! u . grad U_b^T
290 !-----------------------------
291 ! take all the gradients
292 call opgrad(this%duxb, this%duyb, this%duzb, this%txb, c_gl)
293 call opgrad(this%dvxb, this%dvyb, this%dvzb, this%tyb, c_gl)
294 call opgrad(this%dwxb, this%dwyb, this%dwzb, this%tzb, c_gl)
295
296 ! traspose and multiply
297 call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &
298 this%duxb_d, this%dvxb_d, this%dwxb_d, n_gl)
299 call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)
300 call device_sub2(fx%x_d, this%temp_d, n)
301
302 call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &
303 this%duyb_d, this%dvyb_d, this%dwyb_d, n_gl)
304 call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)
305 call device_sub2(fy%x_d, this%temp_d, n)
306
307 call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &
308 this%duzb_d, this%dvzb_d, this%dwzb_d, n_gl)
309 call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)
310 call device_sub2(fz%x_d, this%temp_d, n)
311
312 ! \int \grad v . U_b ^ u with ^ an outer product
313
314 ! (x)
315 ! duxb,duyb,duzb are temporary arrays
316 call device_col3(this%duxb_d, this%tx_d, this%txb_d, n_gl)
317 call device_col3(this%duyb_d, this%tx_d, this%tyb_d, n_gl)
318 call device_col3(this%duzb_d, this%tx_d, this%tzb_d, n_gl)
319
320 ! D^T
321 ! vr,vs,vt are temporary arrays
322 call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
323 call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
324 call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
325
326 ! reuse duxb as a temp
327 call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
328 call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
329 call device_sub2(fx%x_d, this%temp_d, n)
330
331
332 ! (y)
333 ! duxb,duyb,duzb are temporary arrays
334 call device_col3(this%duxb_d, this%ty_d, this%txb_d, n_gl)
335 call device_col3(this%duyb_d, this%ty_d, this%tyb_d, n_gl)
336 call device_col3(this%duzb_d, this%ty_d, this%tzb_d, n_gl)
337
338 ! D^T
339 ! vr,vs,vt are temporary arrays
340 call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
341 call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
342 call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
343
344 ! reuse duxb as a temp
345 call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
346 call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
347 call device_sub2(fy%x_d, this%temp_d, n)
348
349 ! (z)
350 ! duxb,duyb,duzb are temporary arrays
351 call device_col3(this%duxb_d, this%tz_d, this%txb_d, n_gl)
352 call device_col3(this%duyb_d, this%tz_d, this%tyb_d, n_gl)
353 call device_col3(this%duzb_d, this%tz_d, this%tzb_d, n_gl)
354
355 ! D^T
356 ! vr,vs,vt are temporary arrays
357 call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
358 call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
359 call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
360
361 ! reuse duxb as a temp
362 call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
363 call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
364 call device_sub2(fz%x_d, this%temp_d, n)
365 else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) then
366 !TODO
367 else
368
369
370
371 do e = 1, coef%msh%nelv
372 ! Map baseflow to GL
373 call this%GLL_to_GL%map(txb, vxb%x(1,1,1,e), 1, this%Xh_GL)
374 call this%GLL_to_GL%map(tyb, vyb%x(1,1,1,e), 1, this%Xh_GL)
375 call this%GLL_to_GL%map(tzb, vzb%x(1,1,1,e), 1, this%Xh_GL)
376
377 ! Map adjoint velocity to GL
378 call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL)
379 call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL)
380 call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL)
381
382
383 ! u . grad U_b^T
384 !-----------------------------
385 call opgrad(duxb, duyb, duzb, txb, c_gl, e, e)
386 call opgrad(dvxb, dvyb, dvzb, tyb, c_gl, e, e)
387 call opgrad(dwxb, dwyb, dwzb, tzb, c_gl, e, e)
388
389 ! traspose and multiply
390 do i = 1, this%Xh_GL%lxyz
391 tfx(i) = tx(i)*duxb(i) + ty(i)*dvxb(i) + tz(i)*dwxb(i)
392 tfy(i) = tx(i)*duyb(i) + ty(i)*dvyb(i) + tz(i)*dwyb(i)
393 tfz(i) = tx(i)*duzb(i) + ty(i)*dvzb(i) + tz(i)*dwzb(i)
394 end do
395
396 ! map back to GLL
397 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
398 call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)
399 call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)
400
401 ! accumulate
402 idx = (e-1)*this%Xh_GLL%lxyz+1
403 call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
404 call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)
405 call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)
406
407 ! (x)
408 do i = 1, this%Xh_GL%lxyz
409 duxb(i) = tx(i)*txb(i)
410 duyb(i) = tx(i)*tyb(i)
411 duzb(i) = tx(i)*tzb(i)
412 end do
413
414 ! D^T
415 call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
416 call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
417 call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
418
419 ! sum them
420 do i = 1, this%Xh_GL%lxyz
421 tfx(i) = tfx(i) + tfy(i) + tfz(i)
422 end do
423
424 ! map back to GLL
425 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
426 call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
427
428 ! (y)
429 do i = 1, this%Xh_GL%lxyz
430 duxb(i) = ty(i)*txb(i)
431 duyb(i) = ty(i)*tyb(i)
432 duzb(i) = ty(i)*tzb(i)
433 end do
434
435 ! D^T
436 call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
437 call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
438 call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
439
440 ! sum them
441 do i = 1, this%Xh_GL%lxyz
442 tfx(i) = tfx(i) + tfy(i) + tfz(i)
443 end do
444
445 ! map back to GLL
446 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
447 call sub2(fy%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
448
449 ! (z)
450 do i = 1, this%Xh_GL%lxyz
451 duxb(i) = tz(i)*txb(i)
452 duyb(i) = tz(i)*tyb(i)
453 duzb(i) = tz(i)*tzb(i)
454 end do
455 ! D^T
456 call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
457 call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
458 call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
459
460 ! sum them
461 do i = 1, this%Xh_GL%lxyz
462 tfx(i) = tfx(i) + tfy(i) + tfz(i)
463 end do
464
465 ! map back to GLL
466 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
467 call sub2(fz%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
468
469 end do
470
471
472 end if
473 end associate
474
475 end subroutine compute_adjoint_advection_dealias
476
493 subroutine compute_linear_advection_dealias(this, vx, vy, vz, vxb, vyb, vzb, &
494 fx, fy, fz, Xh, coef, n)
495 implicit none
496 class(adv_lin_dealias_t), intent(inout) :: this
497 type(space_t), intent(inout) :: Xh
498 type(coef_t), intent(inout) :: coef
499 type(field_t), intent(inout) :: vx, vy, vz
500 type(field_t), intent(inout) :: vxb, vyb, vzb
501 integer, intent(in) :: n
502 type(field_t), intent(inout) :: fx, fy, fz
503 real(kind=rp), dimension(this%Xh_GL%lxyz) :: tx, ty, tz
504 real(kind=rp), dimension(this%Xh_GL%lxyz) :: txb, tyb, tzb
505 real(kind=rp), dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz
506 real(kind=rp), dimension(this%Xh_GL%lxyz) :: vr, vs, vt
507 real(kind=rp), dimension(this%Xh_GLL%lxyz) :: tempx, tempy, tempz
508
509 integer :: e, i, idx, nel, n_GL
510 nel = coef%msh%nelv
511 n_gl = nel * this%Xh_GL%lxyz
512
513 !This is extremely primitive and unoptimized on the device //Karp
514 associate(c_gl => this%coef_GL)
515
516 if (neko_bcknd_device .eq. 1) then
517 ! Map baseflow to GL
518 call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
519 call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
520 call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
521
522 ! Map perturbed velocity to GL
523 call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
524 call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
525 call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
526
527 ! u'.grad U
528 call opgrad(this%vr, this%vs, this%vt, this%txb, c_gl)
529 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
530 this%tx_d, this%ty_d, this%tz_d, n_gl)
531 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
532 call device_sub2(fx%x_d, this%temp_d, n)
533
534
535 call opgrad(this%vr, this%vs, this%vt, this%tyb, c_gl)
536 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
537 this%tx_d, this%ty_d, this%tz_d, n_gl)
538 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
539 call device_sub2(fy%x_d, this%temp_d, n)
540
541 call opgrad(this%vr, this%vs, this%vt, this%tzb, c_gl)
542 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
543 this%tx_d, this%ty_d, this%tz_d, n_gl)
544 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
545 call device_sub2(fz%x_d, this%temp_d, n)
546
547 ! U.grad u'
548 call opgrad(this%vr, this%vs, this%vt, this%tx, c_gl)
549 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
550 this%txb_d, this%tyb_d, this%tzb_d, n_gl)
551 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
552 call device_sub2(fx%x_d, this%temp_d, n)
553
554
555 call opgrad(this%vr, this%vs, this%vt, this%ty, c_gl)
556 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
557 this%txb_d, this%tyb_d, this%tzb_d, n_gl)
558 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
559 call device_sub2(fy%x_d, this%temp_d, n)
560
561 call opgrad(this%vr, this%vs, this%vt, this%tz, c_gl)
562 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
563 this%txb_d, this%tyb_d, this%tzb_d, n_gl)
564 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
565 call device_sub2(fz%x_d, this%temp_d, n)
566
567 else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) then
568 ! Map baseflow to GL
569 call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
570 call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
571 call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
572
573 ! Map perturbed velocity to GL
574 call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
575 call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
576 call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
577
578 ! u'.grad U
579 call opgrad(this%vr, this%vs, this%vt, this%txb, c_gl)
580 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
581 this%tx, this%ty, this%tz, n_gl)
582 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
583 call sub2(fx%x, this%temp, n)
584
585
586 call opgrad(this%vr, this%vs, this%vt, this%tyb, c_gl)
587 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
588 this%tx, this%ty, this%tz, n_gl)
589 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
590 call sub2(fy%x, this%temp, n)
591
592 call opgrad(this%vr, this%vs, this%vt, this%tzb, c_gl)
593 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
594 this%tx, this%ty, this%tz, n_gl)
595 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
596 call sub2(fz%x, this%temp, n)
597
598 ! U.grad u'
599 call opgrad(this%vr, this%vs, this%vt, this%tx, c_gl)
600 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
601 this%txb, this%tyb, this%tzb, n_gl)
602 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
603 call sub2(fx%x, this%temp, n)
604
605
606 call opgrad(this%vr, this%vs, this%vt, this%ty, c_gl)
607 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
608 this%txb, this%tyb, this%tzb, n_gl)
609 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
610 call sub2(fy%x, this%temp, n)
611
612 call opgrad(this%vr, this%vs, this%vt, this%tz, c_gl)
613 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
614 this%txb, this%tyb, this%tzb, n_gl)
615 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
616 call sub2(fz%x, this%temp, n)
617 else
618
619 do e = 1, coef%msh%nelv
620 ! Map baseflow to GL
621 call this%GLL_to_GL%map(txb, vxb%x(1,1,1,e), 1, this%Xh_GL)
622 call this%GLL_to_GL%map(tyb, vyb%x(1,1,1,e), 1, this%Xh_GL)
623 call this%GLL_to_GL%map(tzb, vzb%x(1,1,1,e), 1, this%Xh_GL)
624 ! Map perturbed velocity to GL
625 call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL)
626 call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL)
627 call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL)
628
629 ! u'.grad U
630 call opgrad(vr, vs, vt, txb, c_gl, e, e)
631 do i = 1, this%Xh_GL%lxyz
632 tfx(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
633 end do
634
635 call opgrad(vr, vs, vt, tyb, c_gl, e, e)
636 do i = 1, this%Xh_GL%lxyz
637 tfy(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
638 end do
639
640 call opgrad(vr, vs, vt, tzb, c_gl, e, e)
641 do i = 1, this%Xh_GL%lxyz
642 tfz(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
643 end do
644
645 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
646 call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)
647 call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)
648
649 idx = (e-1)*this%Xh_GLL%lxyz+1
650 call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
651 call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)
652 call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)
653
654 ! U.grad u'
655 call opgrad(vr, vs, vt, tx, c_gl, e, e)
656 do i = 1, this%Xh_GL%lxyz
657 tfx(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)
658 end do
659
660 call opgrad(vr, vs, vt, ty, c_gl, e, e)
661 do i = 1, this%Xh_GL%lxyz
662 tfy(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)
663 end do
664
665 call opgrad(vr, vs, vt, tz, c_gl, e, e)
666 do i = 1, this%Xh_GL%lxyz
667 tfz(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)
668 end do
669
670 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
671 call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)
672 call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)
673
674 idx = (e-1)*this%Xh_GLL%lxyz+1
675 call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
676 call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)
677 call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)
678 end do
679 end if
680 end associate
681 end subroutine compute_linear_advection_dealias
682
697 subroutine compute_adjoint_scalar_advection_dealias(this, vxb, vyb, vzb, s, &
698 fs, Xh, coef, n, dt)
699 class(adv_lin_dealias_t), intent(inout) :: this
700 type(field_t), intent(inout) :: vxb, vyb, vzb
701 type(field_t), intent(inout) :: s
702 type(field_t), intent(inout) :: fs
703 type(space_t), intent(inout) :: Xh
704 type(coef_t), intent(inout) :: coef
705 integer, intent(in) :: n
706 real(kind=rp), intent(in), optional :: dt
707
708 real(kind=rp), dimension(this%Xh_GL%lxyz) :: vx_gl, vy_gl, vz_gl, s_gl
709 real(kind=rp), dimension(this%Xh_GL%lxyz) :: work1, work2, work3
710 real(kind=rp), dimension(this%Xh_GL%lxyz) :: w1, w2, w3
711 real(kind=rp), dimension(this%Xh_GL%lxyz) :: f_gl
712 integer :: e, i, idx, nel, n_GL
713 real(kind=rp), dimension(this%Xh_GLL%lxyz) :: temp
714
715 nel = coef%msh%nelv
716 n_gl = nel * this%Xh_GL%lxyz
717
718 associate(c_gl => this%coef_GL)
719 if (neko_bcknd_device .eq. 1) then
720 ! Map baseflow to GL
721 call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
722 call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
723 call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
724
725 ! Map adjoint scalar to GL (use tx as adjoint scalar array)
726 call this%GLL_to_GL%map(this%tx, s%x, nel, this%Xh_GL)
727
728 ! Outer product (use duxb, duyb, duzb as temporary arrays)
729 call device_col3(this%duxb_d, this%tx_d, this%txb_d, n_gl)
730 call device_col3(this%duyb_d, this%tx_d, this%tyb_d, n_gl)
731 call device_col3(this%duzb_d, this%tx_d, this%tzb_d, n_gl)
732
733 ! D^T
734 ! vr,vs,vt are temporary arrays
735 call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
736 call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
737 call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
738
739 ! reuse duxb as a temp for summing them
740 call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
741
742 ! map back to GLL
743 call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
744
745 !apply
746 call device_sub2(fs%x_d, this%temp_d, n)
747
748
749 else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) then
750 call neko_error("Adjoint scalar not implemented for SX")
751 else
752 do e = 1, coef%msh%nelv
753 ! Map baseflow to GL
754 call this%GLL_to_GL%map(vx_gl, vxb%x(1,1,1,e), 1, this%Xh_GL)
755 call this%GLL_to_GL%map(vy_gl, vyb%x(1,1,1,e), 1, this%Xh_GL)
756 call this%GLL_to_GL%map(vz_gl, vzb%x(1,1,1,e), 1, this%Xh_GL)
757
758 ! Map passive scalar velocity to GL
759 call this%GLL_to_GL%map(s_gl, s%x(1,1,1,e), 1, this%Xh_GL)
760
761 do i = 1, this%Xh_GL%lxyz
762 work1(i) = s_gl(i)*vx_gl(i)
763 work2(i) = s_gl(i)*vy_gl(i)
764 work3(i) = s_gl(i)*vz_gl(i)
765 end do
766
767 ! D^T
768 call cdtp(w1, work1, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
769 call cdtp(w2, work2, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
770 call cdtp(w3, work3, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
771
772 ! sum them
773 do i = 1, this%Xh_GL%lxyz
774 f_gl(i) = w1(i) + w2(i) + w3(i)
775 end do
776
777 ! map back to GLL
778 idx = (e-1)*this%Xh_GLL%lxyz+1
779 call this%GLL_to_GL%map(temp, f_gl, 1, this%Xh_GLL)
780 call sub2(fs%x(idx, 1, 1, 1), temp, this%Xh_GLL%lxyz)
781
782 end do
783
784 end if
785 end associate
786
787 end subroutine compute_adjoint_scalar_advection_dealias
788end module adv_lin_dealias
Subroutines to add advection terms to the RHS of a transport equation.
Subroutines to add advection terms to the RHS of a transport equation.
Type encapsulating advection routines with dealiasing.
Base abstract type for computing the advection operator.