Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
adv_adjoint_dealias.f90
1! Copyright (c) 2021-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
36 use num_types, only: rp
37 use math, only: vdot3, sub2
38 use space, only: space_t, gl
39 use field, only: field_t
40 use coefs, only: coef_t
41 use neko_config, only: neko_bcknd_device, neko_bcknd_sx, neko_bcknd_xsmm, &
42 neko_bcknd_opencl, neko_bcknd_cuda, neko_bcknd_hip
43 use operators, only: opgrad, cdtp
44 use interpolation, only: interpolator_t
45 use device_math, only: device_vdot3, device_sub2, device_col3, device_add4
46 use device, only: device_map
47 use, intrinsic :: iso_c_binding, only: c_ptr, c_null_ptr
48 implicit none
49 private
50
52 type, public, extends(advection_adjoint_t) :: adv_lin_dealias_t
54 type(coef_t) :: coef_gl
56 type(coef_t), pointer :: coef_gll
58 type(interpolator_t) :: gll_to_gl
60 type(space_t) :: xh_gl
62 type(space_t), pointer :: xh_gll
63 real(kind=rp), allocatable :: temp(:), tbf(:)
65 real(kind=rp), allocatable :: tx(:), ty(:), tz(:)
66 real(kind=rp), allocatable :: vr(:), vs(:), vt(:)
68 type(c_ptr) :: temp_d = c_null_ptr
70 type(c_ptr) :: tbf_d = c_null_ptr
72 type(c_ptr) :: tx_d = c_null_ptr
74 type(c_ptr) :: ty_d = c_null_ptr
76 type(c_ptr) :: tz_d = c_null_ptr
78 type(c_ptr) :: vr_d = c_null_ptr
80 type(c_ptr) :: vs_d = c_null_ptr
82 type(c_ptr) :: vt_d = c_null_ptr
83
84 real(kind=rp), allocatable :: txb(:), tyb(:), tzb(:)
86 type(c_ptr) :: txb_d = c_null_ptr
88 type(c_ptr) :: tyb_d = c_null_ptr
90 type(c_ptr) :: tzb_d = c_null_ptr
91
92 real(kind=rp), allocatable :: duxb(:), duyb(:), duzb(:)
93 real(kind=rp), allocatable :: dvxb(:), dvyb(:), dvzb(:)
94 real(kind=rp), allocatable :: dwxb(:), dwyb(:), dwzb(:)
96 type(c_ptr) :: duxb_d = c_null_ptr
98 type(c_ptr) :: duyb_d = c_null_ptr
100 type(c_ptr) :: duzb_d = c_null_ptr
102 type(c_ptr) :: dvxb_d = c_null_ptr
104 type(c_ptr) :: dvyb_d = c_null_ptr
106 type(c_ptr) :: dvzb_d = c_null_ptr
108 type(c_ptr) :: dwxb_d = c_null_ptr
110 type(c_ptr) :: dwyb_d = c_null_ptr
112 type(c_ptr) :: dwzb_d = c_null_ptr
113
114 contains
118 procedure, pass(this) :: compute_linear => &
119 compute_linear_advection_dealias
125 procedure, pass(this) :: compute_adjoint => &
126 compute_adjoint_advection_dealias
128 ! If one integrates by parts, this essentially switches sign and adds some
129 ! boundary terms.
130 ! We keep the differential operator on the test function
131 procedure, pass(this) :: compute_adjoint_scalar => &
132 compute_adjoint_scalar_advection_dealias
133 ! NOTE
134 ! This linearized advection term is the same as a normal advection term
135 ! so not sure what to do here...
137 procedure, pass(this) :: init => init_dealias
139 procedure, pass(this) :: free => free_dealias
140 end type adv_lin_dealias_t
141
142contains
143
148 subroutine init_dealias(this, lxd, coef)
149 class(adv_lin_dealias_t), target, intent(inout) :: this
150 integer, intent(in) :: lxd
151 type(coef_t), intent(inout), target :: coef
152 integer :: nel, n_GL, n
153
154 call this%Xh_GL%init(gl, lxd, lxd, lxd)
155 this%Xh_GLL => coef%Xh
156 this%coef_GLL => coef
157 call this%GLL_to_GL%init(this%Xh_GL, this%Xh_GLL)
158
159 call this%coef_GL%init(this%Xh_GL, coef%msh)
160
161 nel = coef%msh%nelv
162 n_gl = nel*this%Xh_GL%lxyz
163 n = nel*coef%Xh%lxyz
164 call this%GLL_to_GL%map(this%coef_GL%drdx, coef%drdx, nel, this%Xh_GL)
165 call this%GLL_to_GL%map(this%coef_GL%dsdx, coef%dsdx, nel, this%Xh_GL)
166 call this%GLL_to_GL%map(this%coef_GL%dtdx, coef%dtdx, nel, this%Xh_GL)
167 call this%GLL_to_GL%map(this%coef_GL%drdy, coef%drdy, nel, this%Xh_GL)
168 call this%GLL_to_GL%map(this%coef_GL%dsdy, coef%dsdy, nel, this%Xh_GL)
169 call this%GLL_to_GL%map(this%coef_GL%dtdy, coef%dtdy, nel, this%Xh_GL)
170 call this%GLL_to_GL%map(this%coef_GL%drdz, coef%drdz, nel, this%Xh_GL)
171 call this%GLL_to_GL%map(this%coef_GL%dsdz, coef%dsdz, nel, this%Xh_GL)
172 call this%GLL_to_GL%map(this%coef_GL%dtdz, coef%dtdz, nel, this%Xh_GL)
173
174 if ((neko_bcknd_hip .eq. 1) .or. (neko_bcknd_cuda .eq. 1) .or. &
175 (neko_bcknd_opencl .eq. 1) .or. (neko_bcknd_sx .eq. 1) .or. &
176 (neko_bcknd_xsmm .eq. 1)) then
177 allocate(this%temp(n_gl))
178 allocate(this%tbf(n_gl))
179 allocate(this%tx(n_gl))
180 allocate(this%ty(n_gl))
181 allocate(this%tz(n_gl))
182 allocate(this%vr(n_gl))
183 allocate(this%vs(n_gl))
184 allocate(this%vt(n_gl))
185 allocate(this%duxb(n_gl))
186 allocate(this%duyb(n_gl))
187 allocate(this%duzb(n_gl))
188 allocate(this%dvxb(n_gl))
189 allocate(this%dvyb(n_gl))
190 allocate(this%dvzb(n_gl))
191 allocate(this%dwxb(n_gl))
192 allocate(this%dwyb(n_gl))
193 allocate(this%dwzb(n_gl))
194 allocate(this%txb(n_gl))
195 allocate(this%tyb(n_gl))
196 allocate(this%tzb(n_gl))
197 end if
198
199 if (neko_bcknd_device .eq. 1) then
200 call device_map(this%temp, this%temp_d, n_gl)
201 call device_map(this%tbf, this%tbf_d, n_gl)
202 call device_map(this%tx, this%tx_d, n_gl)
203 call device_map(this%ty, this%ty_d, n_gl)
204 call device_map(this%tz, this%tz_d, n_gl)
205 call device_map(this%vr, this%vr_d, n_gl)
206 call device_map(this%vs, this%vs_d, n_gl)
207 call device_map(this%vt, this%vt_d, n_gl)
208 call device_map(this%duxb, this%duxb_d, n_gl)
209 call device_map(this%duyb, this%duyb_d, n_gl)
210 call device_map(this%duzb, this%duzb_d, n_gl)
211 call device_map(this%dvxb, this%dvxb_d, n_gl)
212 call device_map(this%dvyb, this%dvyb_d, n_gl)
213 call device_map(this%dvzb, this%dvzb_d, n_gl)
214 call device_map(this%dwxb, this%dwxb_d, n_gl)
215 call device_map(this%dwyb, this%dwyb_d, n_gl)
216 call device_map(this%dwzb, this%dwzb_d, n_gl)
217 call device_map(this%txb, this%txb_d, n_gl)
218 call device_map(this%tyb, this%tyb_d, n_gl)
219 call device_map(this%tzb, this%tzb_d, n_gl)
220 end if
221
222 end subroutine init_dealias
223
225 subroutine free_dealias(this)
226 class(adv_lin_dealias_t), intent(inout) :: this
227 end subroutine free_dealias
228
229
247 subroutine compute_adjoint_advection_dealias(this, vx, vy, vz, vxb, vyb, &
248 vzb, fx, fy, fz, Xh, coef, n)
249 !! HARRY added vxb etc for baseflow
250 implicit none
251 class(adv_lin_dealias_t), intent(inout) :: this
252 type(space_t), intent(inout) :: Xh
253 type(coef_t), intent(inout) :: coef
254 type(field_t), intent(inout) :: vx, vy, vz
255 type(field_t), intent(inout) :: vxb, vyb, vzb
256 type(field_t), intent(inout) :: fx, fy, fz
257 integer, intent(in) :: n
258 real(kind=rp), dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz
259
260 ! u and U_b on dealiased mesh (one element)
261 real(kind=rp), dimension(this%Xh_GL%lxyz) :: tx, ty, tz
262 real(kind=rp), dimension(this%Xh_GL%lxyz) :: txb, tyb, tzb
263
264 ! gradients of U_b on dealiased mesh (one element)
265 real(kind=rp), dimension(this%Xh_GL%lxyz) :: duxb, dvxb, dwxb
266 real(kind=rp), dimension(this%Xh_GL%lxyz) :: duyb, dvyb, dwyb
267 real(kind=rp), dimension(this%Xh_GL%lxyz) :: duzb, dvzb, dwzb
268
269 real(kind=rp), dimension(this%Xh_GLL%lxyz) :: tempx, tempy, tempz
270 integer :: e, i, idx, nel, n_GL
271
272 nel = coef%msh%nelv
273 n_gl = nel * this%Xh_GL%lxyz
274 associate(c_gl => this%coef_GL)
275
276 if (neko_bcknd_device .eq. 1) then
277 ! Map baseflow to GL
278 call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
279 call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
280 call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
281
282 ! Map adjoint velocity to GL
283 call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
284 call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
285 call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
286
287
288 ! u . grad U_b^T
289 !-----------------------------
290 ! take all the gradients
291 call opgrad(this%duxb, this%duyb, this%duzb, this%txb, c_gl)
292 call opgrad(this%dvxb, this%dvyb, this%dvzb, this%tyb, c_gl)
293 call opgrad(this%dwxb, this%dwyb, this%dwzb, this%tzb, c_gl)
294
295 ! traspose and multiply
296 call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &
297 this%duxb_d, this%dvxb_d, this%dwxb_d, n_gl)
298 call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)
299 call device_sub2(fx%x_d, this%temp_d, n)
300
301 call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &
302 this%duyb_d, this%dvyb_d, this%dwyb_d, n_gl)
303 call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)
304 call device_sub2(fy%x_d, this%temp_d, n)
305
306 call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &
307 this%duzb_d, this%dvzb_d, this%dwzb_d, n_gl)
308 call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)
309 call device_sub2(fz%x_d, this%temp_d, n)
310
311 ! \int \grad v . U_b ^ u with ^ an outer product
312
313 ! (x)
314 ! duxb,duyb,duzb are temporary arrays
315 call device_col3(this%duxb_d, this%tx_d, this%txb_d, n_gl)
316 call device_col3(this%duyb_d, this%tx_d, this%tyb_d, n_gl)
317 call device_col3(this%duzb_d, this%tx_d, this%tzb_d, n_gl)
318
319 ! D^T
320 ! vr,vs,vt are temporary arrays
321 call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
322 call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
323 call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
324
325 ! reuse duxb as a temp
326 call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
327 call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
328 call device_sub2(fx%x_d, this%temp_d, n)
329
330
331 ! (y)
332 ! duxb,duyb,duzb are temporary arrays
333 call device_col3(this%duxb_d, this%ty_d, this%txb_d, n_gl)
334 call device_col3(this%duyb_d, this%ty_d, this%tyb_d, n_gl)
335 call device_col3(this%duzb_d, this%ty_d, this%tzb_d, n_gl)
336
337 ! D^T
338 ! vr,vs,vt are temporary arrays
339 call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
340 call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
341 call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
342
343 ! reuse duxb as a temp
344 call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
345 call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
346 call device_sub2(fy%x_d, this%temp_d, n)
347
348 ! (z)
349 ! duxb,duyb,duzb are temporary arrays
350 call device_col3(this%duxb_d, this%tz_d, this%txb_d, n_gl)
351 call device_col3(this%duyb_d, this%tz_d, this%tyb_d, n_gl)
352 call device_col3(this%duzb_d, this%tz_d, this%tzb_d, n_gl)
353
354 ! D^T
355 ! vr,vs,vt are temporary arrays
356 call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
357 call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
358 call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
359
360 ! reuse duxb as a temp
361 call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
362 call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
363 call device_sub2(fz%x_d, this%temp_d, n)
364 else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) then
365 !TODO
366 else
367
368
369
370 do e = 1, coef%msh%nelv
371 ! Map baseflow to GL
372 call this%GLL_to_GL%map(txb, vxb%x(1,1,1,e), 1, this%Xh_GL)
373 call this%GLL_to_GL%map(tyb, vyb%x(1,1,1,e), 1, this%Xh_GL)
374 call this%GLL_to_GL%map(tzb, vzb%x(1,1,1,e), 1, this%Xh_GL)
375
376 ! Map adjoint velocity to GL
377 call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL)
378 call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL)
379 call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL)
380
381
382 ! u . grad U_b^T
383 !-----------------------------
384 call opgrad(duxb, duyb, duzb, txb, c_gl, e, e)
385 call opgrad(dvxb, dvyb, dvzb, tyb, c_gl, e, e)
386 call opgrad(dwxb, dwyb, dwzb, tzb, c_gl, e, e)
387
388 ! traspose and multiply
389 do i = 1, this%Xh_GL%lxyz
390 tfx(i) = tx(i)*duxb(i) + ty(i)*dvxb(i) + tz(i)*dwxb(i)
391 tfy(i) = tx(i)*duyb(i) + ty(i)*dvyb(i) + tz(i)*dwyb(i)
392 tfz(i) = tx(i)*duzb(i) + ty(i)*dvzb(i) + tz(i)*dwzb(i)
393 end do
394
395 ! map back to GLL
396 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
397 call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)
398 call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)
399
400 ! accumulate
401 idx = (e-1)*this%Xh_GLL%lxyz+1
402 call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
403 call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)
404 call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)
405
406 ! (x)
407 do i = 1, this%Xh_GL%lxyz
408 duxb(i) = tx(i)*txb(i)
409 duyb(i) = tx(i)*tyb(i)
410 duzb(i) = tx(i)*tzb(i)
411 end do
412
413 ! D^T
414 call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
415 call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
416 call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
417
418 ! sum them
419 do i = 1, this%Xh_GL%lxyz
420 tfx(i) = tfx(i) + tfy(i) + tfz(i)
421 end do
422
423 ! map back to GLL
424 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
425 call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
426
427 ! (y)
428 do i = 1, this%Xh_GL%lxyz
429 duxb(i) = ty(i)*txb(i)
430 duyb(i) = ty(i)*tyb(i)
431 duzb(i) = ty(i)*tzb(i)
432 end do
433
434 ! D^T
435 call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
436 call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
437 call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
438
439 ! sum them
440 do i = 1, this%Xh_GL%lxyz
441 tfx(i) = tfx(i) + tfy(i) + tfz(i)
442 end do
443
444 ! map back to GLL
445 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
446 call sub2(fy%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
447
448 ! (z)
449 do i = 1, this%Xh_GL%lxyz
450 duxb(i) = tz(i)*txb(i)
451 duyb(i) = tz(i)*tyb(i)
452 duzb(i) = tz(i)*tzb(i)
453 end do
454 ! D^T
455 call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
456 call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
457 call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
458
459 ! sum them
460 do i = 1, this%Xh_GL%lxyz
461 tfx(i) = tfx(i) + tfy(i) + tfz(i)
462 end do
463
464 ! map back to GLL
465 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
466 call sub2(fz%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
467
468 end do
469
470
471 end if
472 end associate
473
474 end subroutine compute_adjoint_advection_dealias
475
492 subroutine compute_linear_advection_dealias(this, vx, vy, vz, vxb, vyb, vzb, &
493 fx, fy, fz, Xh, coef, n)
494 implicit none
495 class(adv_lin_dealias_t), intent(inout) :: this
496 type(space_t), intent(inout) :: Xh
497 type(coef_t), intent(inout) :: coef
498 type(field_t), intent(inout) :: vx, vy, vz
499 type(field_t), intent(inout) :: vxb, vyb, vzb
500 integer, intent(in) :: n
501 type(field_t), intent(inout) :: fx, fy, fz
502 real(kind=rp), dimension(this%Xh_GL%lxyz) :: tx, ty, tz
503 real(kind=rp), dimension(this%Xh_GL%lxyz) :: txb, tyb, tzb
504 real(kind=rp), dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz
505 real(kind=rp), dimension(this%Xh_GL%lxyz) :: vr, vs, vt
506 real(kind=rp), dimension(this%Xh_GLL%lxyz) :: tempx, tempy, tempz
507
508 integer :: e, i, idx, nel, n_GL
509 nel = coef%msh%nelv
510 n_gl = nel * this%Xh_GL%lxyz
511
512 !This is extremely primitive and unoptimized on the device //Karp
513 associate(c_gl => this%coef_GL)
514
515 if (neko_bcknd_device .eq. 1) then
516 ! Map baseflow to GL
517 call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
518 call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
519 call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
520
521 ! Map perturbed velocity to GL
522 call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
523 call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
524 call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
525
526 ! u'.grad U
527 call opgrad(this%vr, this%vs, this%vt, this%txb, c_gl)
528 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
529 this%tx_d, this%ty_d, this%tz_d, n_gl)
530 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
531 call device_sub2(fx%x_d, this%temp_d, n)
532
533
534 call opgrad(this%vr, this%vs, this%vt, this%tyb, c_gl)
535 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
536 this%tx_d, this%ty_d, this%tz_d, n_gl)
537 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
538 call device_sub2(fy%x_d, this%temp_d, n)
539
540 call opgrad(this%vr, this%vs, this%vt, this%tzb, c_gl)
541 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
542 this%tx_d, this%ty_d, this%tz_d, n_gl)
543 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
544 call device_sub2(fz%x_d, this%temp_d, n)
545
546 ! U.grad u'
547 call opgrad(this%vr, this%vs, this%vt, this%tx, c_gl)
548 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
549 this%txb_d, this%tyb_d, this%tzb_d, n_gl)
550 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
551 call device_sub2(fx%x_d, this%temp_d, n)
552
553
554 call opgrad(this%vr, this%vs, this%vt, this%ty, c_gl)
555 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
556 this%txb_d, this%tyb_d, this%tzb_d, n_gl)
557 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
558 call device_sub2(fy%x_d, this%temp_d, n)
559
560 call opgrad(this%vr, this%vs, this%vt, this%tz, c_gl)
561 call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
562 this%txb_d, this%tyb_d, this%tzb_d, n_gl)
563 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
564 call device_sub2(fz%x_d, this%temp_d, n)
565
566 else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) then
567 ! Map baseflow to GL
568 call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
569 call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
570 call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
571
572 ! Map perturbed velocity to GL
573 call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
574 call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
575 call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
576
577 ! u'.grad U
578 call opgrad(this%vr, this%vs, this%vt, this%txb, c_gl)
579 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
580 this%tx, this%ty, this%tz, n_gl)
581 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
582 call sub2(fx%x, this%temp, n)
583
584
585 call opgrad(this%vr, this%vs, this%vt, this%tyb, c_gl)
586 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
587 this%tx, this%ty, this%tz, n_gl)
588 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
589 call sub2(fy%x, this%temp, n)
590
591 call opgrad(this%vr, this%vs, this%vt, this%tzb, c_gl)
592 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
593 this%tx, this%ty, this%tz, n_gl)
594 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
595 call sub2(fz%x, this%temp, n)
596
597 ! U.grad u'
598 call opgrad(this%vr, this%vs, this%vt, this%tx, c_gl)
599 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
600 this%txb, this%tyb, this%tzb, n_gl)
601 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
602 call sub2(fx%x, this%temp, n)
603
604
605 call opgrad(this%vr, this%vs, this%vt, this%ty, c_gl)
606 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
607 this%txb, this%tyb, this%tzb, n_gl)
608 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
609 call sub2(fy%x, this%temp, n)
610
611 call opgrad(this%vr, this%vs, this%vt, this%tz, c_gl)
612 call vdot3(this%tbf, this%vr, this%vs, this%vt, &
613 this%txb, this%tyb, this%tzb, n_gl)
614 call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
615 call sub2(fz%x, this%temp, n)
616 else
617
618 do e = 1, coef%msh%nelv
619 ! Map baseflow to GL
620 call this%GLL_to_GL%map(txb, vxb%x(1,1,1,e), 1, this%Xh_GL)
621 call this%GLL_to_GL%map(tyb, vyb%x(1,1,1,e), 1, this%Xh_GL)
622 call this%GLL_to_GL%map(tzb, vzb%x(1,1,1,e), 1, this%Xh_GL)
623 ! Map perturbed velocity to GL
624 call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL)
625 call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL)
626 call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL)
627
628 ! u'.grad U
629 call opgrad(vr, vs, vt, txb, c_gl, e, e)
630 do i = 1, this%Xh_GL%lxyz
631 tfx(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
632 end do
633
634 call opgrad(vr, vs, vt, tyb, c_gl, e, e)
635 do i = 1, this%Xh_GL%lxyz
636 tfy(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
637 end do
638
639 call opgrad(vr, vs, vt, tzb, c_gl, e, e)
640 do i = 1, this%Xh_GL%lxyz
641 tfz(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
642 end do
643
644 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
645 call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)
646 call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)
647
648 idx = (e-1)*this%Xh_GLL%lxyz+1
649 call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
650 call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)
651 call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)
652
653 ! U.grad u'
654 call opgrad(vr, vs, vt, tx, c_gl, e, e)
655 do i = 1, this%Xh_GL%lxyz
656 tfx(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)
657 end do
658
659 call opgrad(vr, vs, vt, ty, c_gl, e, e)
660 do i = 1, this%Xh_GL%lxyz
661 tfy(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)
662 end do
663
664 call opgrad(vr, vs, vt, tz, c_gl, e, e)
665 do i = 1, this%Xh_GL%lxyz
666 tfz(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)
667 end do
668
669 call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
670 call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)
671 call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)
672
673 idx = (e-1)*this%Xh_GLL%lxyz+1
674 call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
675 call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)
676 call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)
677 end do
678 end if
679 end associate
680 end subroutine compute_linear_advection_dealias
681
696 subroutine compute_adjoint_scalar_advection_dealias(this, vxb, vyb, vzb, s, &
697 fs, Xh, coef, n, dt)
698 class(adv_lin_dealias_t), intent(inout) :: this
699 type(field_t), intent(inout) :: vxb, vyb, vzb
700 type(field_t), intent(inout) :: s
701 type(field_t), intent(inout) :: fs
702 type(space_t), intent(inout) :: Xh
703 type(coef_t), intent(inout) :: coef
704 integer, intent(in) :: n
705 real(kind=rp), intent(in), optional :: dt
706
707 real(kind=rp), dimension(this%Xh_GL%lxyz) :: vx_gl, vy_gl, vz_gl, s_gl
708 real(kind=rp), dimension(this%Xh_GL%lxyz) :: work1, work2, work3
709 real(kind=rp), dimension(this%Xh_GL%lxyz) :: w1, w2, w3
710 real(kind=rp), dimension(this%Xh_GL%lxyz) :: f_gl
711 integer :: e, i, idx, nel, n_GL
712 real(kind=rp), dimension(this%Xh_GLL%lxyz) :: temp
713
714 nel = coef%msh%nelv
715 n_gl = nel * this%Xh_GL%lxyz
716
717 associate(c_gl => this%coef_GL)
718 if (neko_bcknd_device .eq. 1) then
719 ! Map baseflow to GL
720 call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
721 call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
722 call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
723
724 ! Map adjoint scalar to GL (use tx as adjoint scalar array)
725 call this%GLL_to_GL%map(this%tx, s%x, nel, this%Xh_GL)
726
727 ! Outer product (use duxb, duyb, duzb as temporary arrays)
728 call device_col3(this%duxb_d, this%tx_d, this%txb_d, n_gl)
729 call device_col3(this%duyb_d, this%tx_d, this%tyb_d, n_gl)
730 call device_col3(this%duzb_d, this%tx_d, this%tzb_d, n_gl)
731
732 ! D^T
733 ! vr,vs,vt are temporary arrays
734 call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
735 call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
736 call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
737
738 ! reuse duxb as a temp for summing them
739 call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
740
741 ! map back to GLL
742 call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
743
744 !apply
745 call device_sub2(fs%x_d, this%temp_d, n)
746
747
748 else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) then
749 call neko_error("Adjoint scalar not implemented for SX")
750 else
751 do e = 1, coef%msh%nelv
752 ! Map baseflow to GL
753 call this%GLL_to_GL%map(vx_gl, vxb%x(1,1,1,e), 1, this%Xh_GL)
754 call this%GLL_to_GL%map(vy_gl, vyb%x(1,1,1,e), 1, this%Xh_GL)
755 call this%GLL_to_GL%map(vz_gl, vzb%x(1,1,1,e), 1, this%Xh_GL)
756
757 ! Map passive scalar velocity to GL
758 call this%GLL_to_GL%map(s_gl, s%x(1,1,1,e), 1, this%Xh_GL)
759
760 do i = 1, this%Xh_GL%lxyz
761 work1(i) = s_gl(i)*vx_gl(i)
762 work2(i) = s_gl(i)*vy_gl(i)
763 work3(i) = s_gl(i)*vz_gl(i)
764 end do
765
766 ! D^T
767 call cdtp(w1, work1, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
768 call cdtp(w2, work2, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
769 call cdtp(w3, work3, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
770
771 ! sum them
772 do i = 1, this%Xh_GL%lxyz
773 f_gl(i) = w1(i) + w2(i) + w3(i)
774 end do
775
776 ! map back to GLL
777 idx = (e-1)*this%Xh_GLL%lxyz+1
778 call this%GLL_to_GL%map(temp, f_gl, 1, this%Xh_GLL)
779 call sub2(fs%x(idx, 1, 1, 1), temp, this%Xh_GLL%lxyz)
780
781 end do
782
783 end if
784 end associate
785
786 end subroutine compute_adjoint_scalar_advection_dealias
787end module adv_lin_dealias
Subroutines to add advection terms to the RHS of a transport equation.
Subroutines to add advection terms to the RHS of a transport equation.
Type encapsulating advection routines with dealiasing.
Base abstract type for computing the advection operator.