d6/d83/adv__adjoint__dealias_8f90_source.html

! Copyright (c) 2021-2024, The Neko Authors

! All rights reserved.

!

! Redistribution and use in source and binary forms, with or without

! modification, are permitted provided that the following conditions

! are met:

!

!   * Redistributions of source code must retain the above copyright

!     notice, this list of conditions and the following disclaimer.

!

!   * Redistributions in binary form must reproduce the above

!     copyright notice, this list of conditions and the following

!     disclaimer in the documentation and/or other materials provided

!     with the distribution.

!

!   * Neither the name of the authors nor the names of its

!     contributors may be used to endorse or promote products derived

!     from this software without specific prior written permission.

!

! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN

! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

! POSSIBILITY OF SUCH DAMAGE.

!

module adv_lin_dealias

  use advection_adjoint, only: advection_adjoint_t

  use num_types, only: rp

  use math, only: vdot3, sub2

  use space, only: space_t, gl

  use field, only: field_t

  use coefs, only: coef_t

  use neko_config, only: neko_bcknd_device, neko_bcknd_sx, neko_bcknd_xsmm, &

       neko_bcknd_opencl, neko_bcknd_cuda, neko_bcknd_hip

  use operators, only: opgrad, cdtp

  use interpolation, only: interpolator_t

  use device_math, only: device_vdot3, device_sub2, device_col3, device_add4

  use device, only: device_map

  use, intrinsic :: iso_c_binding, only: c_ptr, c_null_ptr

  implicit none

  private


  type, public, extends(advection_adjoint_t) :: adv_lin_dealias_t

     type(coef_t) :: coef_gl

     type(coef_t), pointer :: coef_gll

     type(interpolator_t) :: gll_to_gl

     type(space_t) :: xh_gl

     type(space_t), pointer :: xh_gll

     real(kind=rp), allocatable :: temp(:), tbf(:)

     real(kind=rp), allocatable :: tx(:), ty(:), tz(:)

     real(kind=rp), allocatable :: vr(:), vs(:), vt(:)

     type(c_ptr) :: temp_d = c_null_ptr

     type(c_ptr) :: tbf_d = c_null_ptr

     type(c_ptr) :: tx_d = c_null_ptr

     type(c_ptr) :: ty_d = c_null_ptr

     type(c_ptr) :: tz_d = c_null_ptr

     type(c_ptr) :: vr_d = c_null_ptr

     type(c_ptr) :: vs_d = c_null_ptr

     type(c_ptr) :: vt_d = c_null_ptr


     real(kind=rp), allocatable :: txb(:), tyb(:), tzb(:)

     type(c_ptr) :: txb_d = c_null_ptr

     type(c_ptr) :: tyb_d = c_null_ptr

     type(c_ptr) :: tzb_d = c_null_ptr


     real(kind=rp), allocatable :: duxb(:), duyb(:), duzb(:)

     real(kind=rp), allocatable :: dvxb(:), dvyb(:), dvzb(:)

     real(kind=rp), allocatable :: dwxb(:), dwyb(:), dwzb(:)

     type(c_ptr) :: duxb_d = c_null_ptr

     type(c_ptr) :: duyb_d = c_null_ptr

     type(c_ptr) :: duzb_d = c_null_ptr

     type(c_ptr) :: dvxb_d = c_null_ptr

     type(c_ptr) :: dvyb_d = c_null_ptr

     type(c_ptr) :: dvzb_d = c_null_ptr

     type(c_ptr) :: dwxb_d = c_null_ptr

     type(c_ptr) :: dwyb_d = c_null_ptr

     type(c_ptr) :: dwzb_d = c_null_ptr


   contains

     procedure, pass(this) :: compute_linear => &

          compute_linear_advection_dealias

     procedure, pass(this) :: compute_adjoint => &

          compute_adjoint_advection_dealias

     ! If one integrates by parts, this essentially switches sign and adds some

     ! boundary terms.

     ! We keep the differential operator on the test function

     procedure, pass(this) :: compute_adjoint_scalar => &

          compute_adjoint_scalar_advection_dealias

     ! NOTE

     ! This linearized advection term is the same as a normal advection term

     ! so not sure what to do here...

     procedure, pass(this) :: init => init_dealias

     procedure, pass(this) :: free => free_dealias

  end type adv_lin_dealias_t


contains


  subroutine init_dealias(this, lxd, coef)

    class(adv_lin_dealias_t), target, intent(inout) :: this

    integer, intent(in) :: lxd

    type(coef_t), intent(inout), target :: coef

    integer :: nel, n_GL, n


    call this%Xh_GL%init(gl, lxd, lxd, lxd)

    this%Xh_GLL => coef%Xh

    this%coef_GLL => coef

    call this%GLL_to_GL%init(this%Xh_GL, this%Xh_GLL)


    call this%coef_GL%init(this%Xh_GL, coef%msh)


    nel = coef%msh%nelv

    n_gl = nel*this%Xh_GL%lxyz

    n = nel*coef%Xh%lxyz

    call this%GLL_to_GL%map(this%coef_GL%drdx, coef%drdx, nel, this%Xh_GL)

    call this%GLL_to_GL%map(this%coef_GL%dsdx, coef%dsdx, nel, this%Xh_GL)

    call this%GLL_to_GL%map(this%coef_GL%dtdx, coef%dtdx, nel, this%Xh_GL)

    call this%GLL_to_GL%map(this%coef_GL%drdy, coef%drdy, nel, this%Xh_GL)

    call this%GLL_to_GL%map(this%coef_GL%dsdy, coef%dsdy, nel, this%Xh_GL)

    call this%GLL_to_GL%map(this%coef_GL%dtdy, coef%dtdy, nel, this%Xh_GL)

    call this%GLL_to_GL%map(this%coef_GL%drdz, coef%drdz, nel, this%Xh_GL)

    call this%GLL_to_GL%map(this%coef_GL%dsdz, coef%dsdz, nel, this%Xh_GL)

    call this%GLL_to_GL%map(this%coef_GL%dtdz, coef%dtdz, nel, this%Xh_GL)


    if ((neko_bcknd_hip .eq. 1) .or. (neko_bcknd_cuda .eq. 1) .or. &

         (neko_bcknd_opencl .eq. 1) .or. (neko_bcknd_sx .eq. 1) .or. &

         (neko_bcknd_xsmm .eq. 1)) then

       allocate(this%temp(n_gl))

       allocate(this%tbf(n_gl))

       allocate(this%tx(n_gl))

       allocate(this%ty(n_gl))

       allocate(this%tz(n_gl))

       allocate(this%vr(n_gl))

       allocate(this%vs(n_gl))

       allocate(this%vt(n_gl))

       allocate(this%duxb(n_gl))

       allocate(this%duyb(n_gl))

       allocate(this%duzb(n_gl))

       allocate(this%dvxb(n_gl))

       allocate(this%dvyb(n_gl))

       allocate(this%dvzb(n_gl))

       allocate(this%dwxb(n_gl))

       allocate(this%dwyb(n_gl))

       allocate(this%dwzb(n_gl))

       allocate(this%txb(n_gl))

       allocate(this%tyb(n_gl))

       allocate(this%tzb(n_gl))

    end if


    if (neko_bcknd_device .eq. 1) then

       call device_map(this%temp, this%temp_d, n_gl)

       call device_map(this%tbf, this%tbf_d, n_gl)

       call device_map(this%tx, this%tx_d, n_gl)

       call device_map(this%ty, this%ty_d, n_gl)

       call device_map(this%tz, this%tz_d, n_gl)

       call device_map(this%vr, this%vr_d, n_gl)

       call device_map(this%vs, this%vs_d, n_gl)

       call device_map(this%vt, this%vt_d, n_gl)

       call device_map(this%duxb, this%duxb_d, n_gl)

       call device_map(this%duyb, this%duyb_d, n_gl)

       call device_map(this%duzb, this%duzb_d, n_gl)

       call device_map(this%dvxb, this%dvxb_d, n_gl)

       call device_map(this%dvyb, this%dvyb_d, n_gl)

       call device_map(this%dvzb, this%dvzb_d, n_gl)

       call device_map(this%dwxb, this%dwxb_d, n_gl)

       call device_map(this%dwyb, this%dwyb_d, n_gl)

       call device_map(this%dwzb, this%dwzb_d, n_gl)

       call device_map(this%txb, this%txb_d, n_gl)

       call device_map(this%tyb, this%tyb_d, n_gl)

       call device_map(this%tzb, this%tzb_d, n_gl)

    end if


  end subroutine init_dealias


  subroutine free_dealias(this)

    class(adv_lin_dealias_t), intent(inout) :: this

  end subroutine free_dealias


  subroutine compute_adjoint_advection_dealias(this, vx, vy, vz, vxb, vyb, &

       vzb, fx, fy, fz, Xh, coef, n)

    !! HARRY added vxb etc for baseflow

    implicit none

    class(adv_lin_dealias_t), intent(inout) :: this

    type(space_t), intent(inout) :: Xh

    type(coef_t), intent(inout) :: coef

    type(field_t), intent(inout) :: vx, vy, vz

    type(field_t), intent(inout) :: vxb, vyb, vzb

    type(field_t), intent(inout) :: fx, fy, fz

    integer, intent(in) :: n

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz


    ! u and U_b on dealiased mesh (one element)

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: tx, ty, tz

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: txb, tyb, tzb


    ! gradients of U_b on dealiased mesh (one element)

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: duxb, dvxb, dwxb

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: duyb, dvyb, dwyb

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: duzb, dvzb, dwzb


    real(kind=rp), dimension(this%Xh_GLL%lxyz) :: tempx, tempy, tempz

    integer :: e, i, idx, nel, n_GL


    nel = coef%msh%nelv

    n_gl = nel * this%Xh_GL%lxyz

    associate(c_gl => this%coef_GL)


      if (neko_bcknd_device .eq. 1) then

         ! Map baseflow to GL

         call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)


         ! Map adjoint velocity to GL

         call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)


         ! u . grad U_b^T

         !-----------------------------

         ! take all the gradients

         call opgrad(this%duxb, this%duyb, this%duzb, this%txb, c_gl)

         call opgrad(this%dvxb, this%dvyb, this%dvzb, this%tyb, c_gl)

         call opgrad(this%dwxb, this%dwyb, this%dwzb, this%tzb, c_gl)


         ! traspose and multiply

         call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &

              this%duxb_d, this%dvxb_d, this%dwxb_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)

         call device_sub2(fx%x_d, this%temp_d, n)


         call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &

              this%duyb_d, this%dvyb_d, this%dwyb_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)

         call device_sub2(fy%x_d, this%temp_d, n)


         call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &

              this%duzb_d, this%dvzb_d, this%dwzb_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)

         call device_sub2(fz%x_d, this%temp_d, n)


         ! \int \grad v . U_b ^ u    with ^ an outer product


         ! (x)

         ! duxb,duyb,duzb are temporary arrays

         call device_col3(this%duxb_d, this%tx_d, this%txb_d, n_gl)

         call device_col3(this%duyb_d, this%tx_d, this%tyb_d, n_gl)

         call device_col3(this%duzb_d, this%tx_d, this%tzb_d, n_gl)


         ! D^T

         ! vr,vs,vt are temporary arrays

         call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)

         call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)

         call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)


         ! reuse duxb as a temp

         call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)

         call device_sub2(fx%x_d, this%temp_d, n)


         ! (y)

         ! duxb,duyb,duzb are temporary arrays

         call device_col3(this%duxb_d, this%ty_d, this%txb_d, n_gl)

         call device_col3(this%duyb_d, this%ty_d, this%tyb_d, n_gl)

         call device_col3(this%duzb_d, this%ty_d, this%tzb_d, n_gl)


         ! D^T

         ! vr,vs,vt are temporary arrays

         call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)

         call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)

         call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)


         ! reuse duxb as a temp

         call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)

         call device_sub2(fy%x_d, this%temp_d, n)


         ! (z)

         ! duxb,duyb,duzb are temporary arrays

         call device_col3(this%duxb_d, this%tz_d, this%txb_d, n_gl)

         call device_col3(this%duyb_d, this%tz_d, this%tyb_d, n_gl)

         call device_col3(this%duzb_d, this%tz_d, this%tzb_d, n_gl)


         ! D^T

         ! vr,vs,vt are temporary arrays

         call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)

         call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)

         call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)


         ! reuse duxb as a temp

         call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)

         call device_sub2(fz%x_d, this%temp_d, n)

      else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) then

         !TODO

      else


         do e = 1, coef%msh%nelv

            ! Map baseflow to GL

            call this%GLL_to_GL%map(txb, vxb%x(1,1,1,e), 1, this%Xh_GL)

            call this%GLL_to_GL%map(tyb, vyb%x(1,1,1,e), 1, this%Xh_GL)

            call this%GLL_to_GL%map(tzb, vzb%x(1,1,1,e), 1, this%Xh_GL)


            ! Map adjoint velocity to GL

            call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL)

            call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL)

            call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL)


            ! u . grad U_b^T

            !-----------------------------

            call opgrad(duxb, duyb, duzb, txb, c_gl, e, e)

            call opgrad(dvxb, dvyb, dvzb, tyb, c_gl, e, e)

            call opgrad(dwxb, dwyb, dwzb, tzb, c_gl, e, e)


            ! traspose and multiply

            do i = 1, this%Xh_GL%lxyz

               tfx(i) = tx(i)*duxb(i) + ty(i)*dvxb(i) + tz(i)*dwxb(i)

               tfy(i) = tx(i)*duyb(i) + ty(i)*dvyb(i) + tz(i)*dwyb(i)

               tfz(i) = tx(i)*duzb(i) + ty(i)*dvzb(i) + tz(i)*dwzb(i)

            end do


            ! map back to GLL

            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)

            call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)

            call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)


            ! accumulate

            idx = (e-1)*this%Xh_GLL%lxyz+1

            call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)

            call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)

            call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)


            ! (x)

            do i = 1, this%Xh_GL%lxyz

               duxb(i) = tx(i)*txb(i)

               duyb(i) = tx(i)*tyb(i)

               duzb(i) = tx(i)*tzb(i)

            end do


            ! D^T

            call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)

            call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)

            call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)


            ! sum them

            do i = 1, this%Xh_GL%lxyz

               tfx(i) = tfx(i) + tfy(i) + tfz(i)

            end do


            ! map back to GLL

            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)

            call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)


            ! (y)

            do i = 1, this%Xh_GL%lxyz

               duxb(i) = ty(i)*txb(i)

               duyb(i) = ty(i)*tyb(i)

               duzb(i) = ty(i)*tzb(i)

            end do


            ! D^T

            call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)

            call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)

            call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)


            ! sum them

            do i = 1, this%Xh_GL%lxyz

               tfx(i) = tfx(i) + tfy(i) + tfz(i)

            end do


            ! map back to GLL

            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)

            call sub2(fy%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)


            ! (z)

            do i = 1, this%Xh_GL%lxyz

               duxb(i) = tz(i)*txb(i)

               duyb(i) = tz(i)*tyb(i)

               duzb(i) = tz(i)*tzb(i)

            end do

            ! D^T

            call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)

            call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)

            call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)


            ! sum them

            do i = 1, this%Xh_GL%lxyz

               tfx(i) = tfx(i) + tfy(i) + tfz(i)

            end do


            ! map back to GLL

            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)

            call sub2(fz%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)


         end do


      end if

    end associate


  end subroutine compute_adjoint_advection_dealias


  subroutine compute_linear_advection_dealias(this, vx, vy, vz, vxb, vyb, vzb, &

       fx, fy, fz, Xh, coef, n)

    implicit none

    class(adv_lin_dealias_t), intent(inout) :: this

    type(space_t), intent(inout) :: Xh

    type(coef_t), intent(inout) :: coef

    type(field_t), intent(inout) :: vx, vy, vz

    type(field_t), intent(inout) :: vxb, vyb, vzb

    integer, intent(in) :: n

    type(field_t), intent(inout) :: fx, fy, fz

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: tx, ty, tz

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: txb, tyb, tzb

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: vr, vs, vt

    real(kind=rp), dimension(this%Xh_GLL%lxyz) :: tempx, tempy, tempz


    integer :: e, i, idx, nel, n_GL

    nel = coef%msh%nelv

    n_gl = nel * this%Xh_GL%lxyz


    !This is extremely primitive and unoptimized  on the device //Karp

    associate(c_gl => this%coef_GL)


      if (neko_bcknd_device .eq. 1) then

         ! Map baseflow to GL

         call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)


         ! Map perturbed velocity to GL

         call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)


         ! u'.grad U

         call opgrad(this%vr, this%vs, this%vt, this%txb, c_gl)

         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &

              this%tx_d, this%ty_d, this%tz_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call device_sub2(fx%x_d, this%temp_d, n)


         call opgrad(this%vr, this%vs, this%vt, this%tyb, c_gl)

         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &

              this%tx_d, this%ty_d, this%tz_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call device_sub2(fy%x_d, this%temp_d, n)


         call opgrad(this%vr, this%vs, this%vt, this%tzb, c_gl)

         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &

              this%tx_d, this%ty_d, this%tz_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call device_sub2(fz%x_d, this%temp_d, n)


         ! U.grad u'

         call opgrad(this%vr, this%vs, this%vt, this%tx, c_gl)

         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &

              this%txb_d, this%tyb_d, this%tzb_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call device_sub2(fx%x_d, this%temp_d, n)


         call opgrad(this%vr, this%vs, this%vt, this%ty, c_gl)

         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &

              this%txb_d, this%tyb_d, this%tzb_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call device_sub2(fy%x_d, this%temp_d, n)


         call opgrad(this%vr, this%vs, this%vt, this%tz, c_gl)

         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &

              this%txb_d, this%tyb_d, this%tzb_d, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call device_sub2(fz%x_d, this%temp_d, n)


      else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) then

         ! Map baseflow to GL

         call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)


         ! Map perturbed velocity to GL

         call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)


         ! u'.grad U

         call opgrad(this%vr, this%vs, this%vt, this%txb, c_gl)

         call vdot3(this%tbf, this%vr, this%vs, this%vt, &

              this%tx, this%ty, this%tz, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call sub2(fx%x, this%temp, n)


         call opgrad(this%vr, this%vs, this%vt, this%tyb, c_gl)

         call vdot3(this%tbf, this%vr, this%vs, this%vt, &

              this%tx, this%ty, this%tz, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call sub2(fy%x, this%temp, n)


         call opgrad(this%vr, this%vs, this%vt, this%tzb, c_gl)

         call vdot3(this%tbf, this%vr, this%vs, this%vt, &

              this%tx, this%ty, this%tz, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call sub2(fz%x, this%temp, n)


         ! U.grad u'

         call opgrad(this%vr, this%vs, this%vt, this%tx, c_gl)

         call vdot3(this%tbf, this%vr, this%vs, this%vt, &

              this%txb, this%tyb, this%tzb, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call sub2(fx%x, this%temp, n)


         call opgrad(this%vr, this%vs, this%vt, this%ty, c_gl)

         call vdot3(this%tbf, this%vr, this%vs, this%vt, &

              this%txb, this%tyb, this%tzb, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call sub2(fy%x, this%temp, n)


         call opgrad(this%vr, this%vs, this%vt, this%tz, c_gl)

         call vdot3(this%tbf, this%vr, this%vs, this%vt, &

              this%txb, this%tyb, this%tzb, n_gl)

         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)

         call sub2(fz%x, this%temp, n)

      else


         do e = 1, coef%msh%nelv

            ! Map baseflow to GL

            call this%GLL_to_GL%map(txb, vxb%x(1,1,1,e), 1, this%Xh_GL)

            call this%GLL_to_GL%map(tyb, vyb%x(1,1,1,e), 1, this%Xh_GL)

            call this%GLL_to_GL%map(tzb, vzb%x(1,1,1,e), 1, this%Xh_GL)

            ! Map perturbed velocity to GL

            call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL)

            call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL)

            call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL)


            ! u'.grad U

            call opgrad(vr, vs, vt, txb, c_gl, e, e)

            do i = 1, this%Xh_GL%lxyz

               tfx(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)

            end do


            call opgrad(vr, vs, vt, tyb, c_gl, e, e)

            do i = 1, this%Xh_GL%lxyz

               tfy(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)

            end do


            call opgrad(vr, vs, vt, tzb, c_gl, e, e)

            do i = 1, this%Xh_GL%lxyz

               tfz(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)

            end do


            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)

            call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)

            call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)


            idx = (e-1)*this%Xh_GLL%lxyz+1

            call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)

            call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)

            call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)


            ! U.grad u'

            call opgrad(vr, vs, vt, tx, c_gl, e, e)

            do i = 1, this%Xh_GL%lxyz

               tfx(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)

            end do


            call opgrad(vr, vs, vt, ty, c_gl, e, e)

            do i = 1, this%Xh_GL%lxyz

               tfy(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)

            end do


            call opgrad(vr, vs, vt, tz, c_gl, e, e)

            do i = 1, this%Xh_GL%lxyz

               tfz(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)

            end do


            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)

            call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)

            call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)


            idx = (e-1)*this%Xh_GLL%lxyz+1

            call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)

            call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)

            call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)

         end do

      end if

    end associate

  end subroutine compute_linear_advection_dealias


  subroutine compute_adjoint_scalar_advection_dealias(this, vxb, vyb, vzb, s, &

       fs, Xh, coef, n, dt)

    class(adv_lin_dealias_t), intent(inout) :: this

    type(field_t), intent(inout) :: vxb, vyb, vzb

    type(field_t), intent(inout) :: s

    type(field_t), intent(inout) :: fs

    type(space_t), intent(inout) :: Xh

    type(coef_t), intent(inout) :: coef

    integer, intent(in) :: n

    real(kind=rp), intent(in), optional :: dt


    real(kind=rp), dimension(this%Xh_GL%lxyz) :: vx_gl, vy_gl, vz_gl, s_gl

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: work1, work2, work3

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: w1, w2, w3

    real(kind=rp), dimension(this%Xh_GL%lxyz) :: f_gl

    integer :: e, i, idx, nel, n_GL

    real(kind=rp), dimension(this%Xh_GLL%lxyz) :: temp


    nel = coef%msh%nelv

    n_gl = nel * this%Xh_GL%lxyz


    associate(c_gl => this%coef_GL)

      if (neko_bcknd_device .eq. 1) then

         ! Map baseflow to GL

         call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)

         call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)


         ! Map adjoint scalar to GL (use tx as adjoint scalar array)

         call this%GLL_to_GL%map(this%tx, s%x, nel, this%Xh_GL)


         ! Outer product (use duxb, duyb, duzb as temporary arrays)

         call device_col3(this%duxb_d, this%tx_d, this%txb_d, n_gl)

         call device_col3(this%duyb_d, this%tx_d, this%tyb_d, n_gl)

         call device_col3(this%duzb_d, this%tx_d, this%tzb_d, n_gl)


         ! D^T

         ! vr,vs,vt are temporary arrays

         call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)

         call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)

         call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)


         ! reuse duxb as a temp for summing them

         call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)


         ! map back to GLL

         call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)


         !apply

         call device_sub2(fs%x_d, this%temp_d, n)


      else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) then

         call neko_error("Adjoint scalar not implemented for SX")

      else

         do e = 1, coef%msh%nelv

            ! Map baseflow to GL

            call this%GLL_to_GL%map(vx_gl, vxb%x(1,1,1,e), 1, this%Xh_GL)

            call this%GLL_to_GL%map(vy_gl, vyb%x(1,1,1,e), 1, this%Xh_GL)

            call this%GLL_to_GL%map(vz_gl, vzb%x(1,1,1,e), 1, this%Xh_GL)


            ! Map passive scalar velocity to GL

            call this%GLL_to_GL%map(s_gl, s%x(1,1,1,e), 1, this%Xh_GL)


            do i = 1, this%Xh_GL%lxyz

               work1(i) = s_gl(i)*vx_gl(i)

               work2(i) = s_gl(i)*vy_gl(i)

               work3(i) = s_gl(i)*vz_gl(i)

            end do


            ! D^T

            call cdtp(w1, work1, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)

            call cdtp(w2, work2, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)

            call cdtp(w3, work3, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)


            ! sum them

            do i = 1, this%Xh_GL%lxyz

               f_gl(i) = w1(i) + w2(i) + w3(i)

            end do


            ! map back to GLL

            idx = (e-1)*this%Xh_GLL%lxyz+1

            call this%GLL_to_GL%map(temp, f_gl, 1, this%Xh_GLL)

            call sub2(fs%x(idx, 1, 1, 1), temp, this%Xh_GLL%lxyz)


         end do


      end if

    end associate


  end subroutine compute_adjoint_scalar_advection_dealias

end module adv_lin_dealias

adv_lin_dealias
Subroutines to add advection terms to the RHS of a transport equation.
Definition adv_adjoint_dealias.f90:34

advection_adjoint
Subroutines to add advection terms to the RHS of a transport equation.
Definition advection_adjoint.f90:34

adv_lin_dealias::adv_lin_dealias_t
Type encapsulating advection routines with dealiasing.
Definition adv_adjoint_dealias.f90:52

advection_adjoint::advection_adjoint_t
Base abstract type for computing the advection operator.
Definition advection_adjoint.f90:43