36  use num_types, 
only: rp
 
   37  use math, 
only: vdot3, sub2
 
   38  use space, 
only: space_t, gl
 
   39  use field, 
only: field_t
 
   40  use coefs, 
only: coef_t
 
   41  use neko_config, 
only: neko_bcknd_device, neko_bcknd_sx, neko_bcknd_xsmm, &
 
   42       neko_bcknd_opencl, neko_bcknd_cuda, neko_bcknd_hip
 
   43  use operators, 
only: opgrad, cdtp
 
   44  use interpolation, 
only: interpolator_t
 
   45  use device_math, 
only: device_vdot3, device_sub2, device_col3, device_add4
 
   46  use device, 
only: device_map
 
   47  use utils, 
only: neko_error
 
   48  use, 
intrinsic :: iso_c_binding, only: c_ptr, c_null_ptr
 
   55     type(coef_t) :: coef_gl
 
   57     type(coef_t), 
pointer :: coef_gll
 
   59     type(interpolator_t) :: gll_to_gl
 
   61     type(space_t) :: xh_gl
 
   63     type(space_t), 
pointer :: xh_gll
 
   64     real(kind=rp), 
allocatable :: temp(:), tbf(:)
 
   66     real(kind=rp), 
allocatable :: tx(:), ty(:), tz(:)
 
   67     real(kind=rp), 
allocatable :: vr(:), vs(:), vt(:)
 
   69     type(c_ptr) :: temp_d = c_null_ptr
 
   71     type(c_ptr) :: tbf_d = c_null_ptr
 
   73     type(c_ptr) :: tx_d = c_null_ptr
 
   75     type(c_ptr) :: ty_d = c_null_ptr
 
   77     type(c_ptr) :: tz_d = c_null_ptr
 
   79     type(c_ptr) :: vr_d = c_null_ptr
 
   81     type(c_ptr) :: vs_d = c_null_ptr
 
   83     type(c_ptr) :: vt_d = c_null_ptr
 
   85     real(kind=rp), 
allocatable :: txb(:), tyb(:), tzb(:)
 
   87     type(c_ptr) :: txb_d = c_null_ptr
 
   89     type(c_ptr) :: tyb_d = c_null_ptr
 
   91     type(c_ptr) :: tzb_d = c_null_ptr
 
   93     real(kind=rp), 
allocatable :: duxb(:), duyb(:), duzb(:)
 
   94     real(kind=rp), 
allocatable :: dvxb(:), dvyb(:), dvzb(:)
 
   95     real(kind=rp), 
allocatable :: dwxb(:), dwyb(:), dwzb(:)
 
   97     type(c_ptr) :: duxb_d = c_null_ptr
 
   99     type(c_ptr) :: duyb_d = c_null_ptr
 
  101     type(c_ptr) :: duzb_d = c_null_ptr
 
  103     type(c_ptr) :: dvxb_d = c_null_ptr
 
  105     type(c_ptr) :: dvyb_d = c_null_ptr
 
  107     type(c_ptr) :: dvzb_d = c_null_ptr
 
  109     type(c_ptr) :: dwxb_d = c_null_ptr
 
  111     type(c_ptr) :: dwyb_d = c_null_ptr
 
  113     type(c_ptr) :: dwzb_d = c_null_ptr
 
  119     procedure, pass(this) :: compute_linear => &
 
  120          compute_linear_advection_dealias
 
  126     procedure, pass(this) :: compute_adjoint => &
 
  127          compute_adjoint_advection_dealias
 
  132     procedure, pass(this) :: compute_adjoint_scalar => &
 
  133          compute_adjoint_scalar_advection_dealias
 
  138     procedure, pass(this) :: init => init_dealias
 
  140     procedure, pass(this) :: free => free_dealias
 
 
  149  subroutine init_dealias(this, lxd, coef)
 
  151    integer, 
intent(in) :: lxd
 
  152    type(coef_t), 
intent(inout), 
target :: coef
 
  153    integer :: nel, n_GL, n
 
  155    call this%Xh_GL%init(gl, lxd, lxd, lxd)
 
  156    this%Xh_GLL => coef%Xh
 
  157    this%coef_GLL => coef
 
  158    call this%GLL_to_GL%init(this%Xh_GL, this%Xh_GLL)
 
  160    call this%coef_GL%init(this%Xh_GL, coef%msh)
 
  163    n_gl = nel*this%Xh_GL%lxyz
 
  165    call this%GLL_to_GL%map(this%coef_GL%drdx, coef%drdx, nel, this%Xh_GL)
 
  166    call this%GLL_to_GL%map(this%coef_GL%dsdx, coef%dsdx, nel, this%Xh_GL)
 
  167    call this%GLL_to_GL%map(this%coef_GL%dtdx, coef%dtdx, nel, this%Xh_GL)
 
  168    call this%GLL_to_GL%map(this%coef_GL%drdy, coef%drdy, nel, this%Xh_GL)
 
  169    call this%GLL_to_GL%map(this%coef_GL%dsdy, coef%dsdy, nel, this%Xh_GL)
 
  170    call this%GLL_to_GL%map(this%coef_GL%dtdy, coef%dtdy, nel, this%Xh_GL)
 
  171    call this%GLL_to_GL%map(this%coef_GL%drdz, coef%drdz, nel, this%Xh_GL)
 
  172    call this%GLL_to_GL%map(this%coef_GL%dsdz, coef%dsdz, nel, this%Xh_GL)
 
  173    call this%GLL_to_GL%map(this%coef_GL%dtdz, coef%dtdz, nel, this%Xh_GL)
 
  175    if ((neko_bcknd_hip .eq. 1) .or. (neko_bcknd_cuda .eq. 1) .or. &
 
  176         (neko_bcknd_opencl .eq. 1) .or. (neko_bcknd_sx .eq. 1) .or. &
 
  177         (neko_bcknd_xsmm .eq. 1)) 
then 
  178       allocate(this%temp(n_gl))
 
  179       allocate(this%tbf(n_gl))
 
  180       allocate(this%tx(n_gl))
 
  181       allocate(this%ty(n_gl))
 
  182       allocate(this%tz(n_gl))
 
  183       allocate(this%vr(n_gl))
 
  184       allocate(this%vs(n_gl))
 
  185       allocate(this%vt(n_gl))
 
  186       allocate(this%duxb(n_gl))
 
  187       allocate(this%duyb(n_gl))
 
  188       allocate(this%duzb(n_gl))
 
  189       allocate(this%dvxb(n_gl))
 
  190       allocate(this%dvyb(n_gl))
 
  191       allocate(this%dvzb(n_gl))
 
  192       allocate(this%dwxb(n_gl))
 
  193       allocate(this%dwyb(n_gl))
 
  194       allocate(this%dwzb(n_gl))
 
  195       allocate(this%txb(n_gl))
 
  196       allocate(this%tyb(n_gl))
 
  197       allocate(this%tzb(n_gl))
 
  200    if (neko_bcknd_device .eq. 1) 
then 
  201       call device_map(this%temp, this%temp_d, n_gl)
 
  202       call device_map(this%tbf, this%tbf_d, n_gl)
 
  203       call device_map(this%tx, this%tx_d, n_gl)
 
  204       call device_map(this%ty, this%ty_d, n_gl)
 
  205       call device_map(this%tz, this%tz_d, n_gl)
 
  206       call device_map(this%vr, this%vr_d, n_gl)
 
  207       call device_map(this%vs, this%vs_d, n_gl)
 
  208       call device_map(this%vt, this%vt_d, n_gl)
 
  209       call device_map(this%duxb, this%duxb_d, n_gl)
 
  210       call device_map(this%duyb, this%duyb_d, n_gl)
 
  211       call device_map(this%duzb, this%duzb_d, n_gl)
 
  212       call device_map(this%dvxb, this%dvxb_d, n_gl)
 
  213       call device_map(this%dvyb, this%dvyb_d, n_gl)
 
  214       call device_map(this%dvzb, this%dvzb_d, n_gl)
 
  215       call device_map(this%dwxb, this%dwxb_d, n_gl)
 
  216       call device_map(this%dwyb, this%dwyb_d, n_gl)
 
  217       call device_map(this%dwzb, this%dwzb_d, n_gl)
 
  218       call device_map(this%txb, this%txb_d, n_gl)
 
  219       call device_map(this%tyb, this%tyb_d, n_gl)
 
  220       call device_map(this%tzb, this%tzb_d, n_gl)
 
 
  223  end subroutine init_dealias
 
  226  subroutine free_dealias(this)
 
  228  end subroutine free_dealias
 
  248  subroutine compute_adjoint_advection_dealias(this, vx, vy, vz, vxb, vyb, &
 
  249       vzb, fx, fy, fz, Xh, coef, n)
 
  253    type(space_t), 
intent(inout) :: Xh
 
  254    type(coef_t), 
intent(inout) :: coef
 
  255    type(field_t), 
intent(inout) :: vx, vy, vz
 
  256    type(field_t), 
intent(inout) :: vxb, vyb, vzb
 
  257    type(field_t), 
intent(inout) :: fx, fy, fz
 
  258    integer, 
intent(in) :: n
 
  259    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz
 
  262    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: tx, ty, tz
 
  263    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: txb, tyb, tzb
 
  266    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: duxb, dvxb, dwxb
 
  267    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: duyb, dvyb, dwyb
 
  268    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: duzb, dvzb, dwzb
 
  270    real(kind=rp), 
dimension(this%Xh_GLL%lxyz) :: tempx, tempy, tempz
 
  271    integer :: e, i, idx, nel, n_GL
 
  274    n_gl = nel * this%Xh_GL%lxyz
 
  275    associate(c_gl => this%coef_GL)
 
  277      if (neko_bcknd_device .eq. 1) 
then 
  279         call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
 
  280         call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
 
  281         call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
 
  284         call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
 
  285         call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
 
  286         call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
 
  292         call opgrad(this%duxb, this%duyb, this%duzb, this%txb, c_gl)
 
  293         call opgrad(this%dvxb, this%dvyb, this%dvzb, this%tyb, c_gl)
 
  294         call opgrad(this%dwxb, this%dwyb, this%dwzb, this%tzb, c_gl)
 
  297         call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &
 
  298              this%duxb_d, this%dvxb_d, this%dwxb_d, n_gl)
 
  299         call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)
 
  300         call device_sub2(fx%x_d, this%temp_d, n)
 
  302         call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &
 
  303              this%duyb_d, this%dvyb_d, this%dwyb_d, n_gl)
 
  304         call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)
 
  305         call device_sub2(fy%x_d, this%temp_d, n)
 
  307         call device_vdot3(this%vr_d, this%tx_d, this%ty_d, this%tz_d, &
 
  308              this%duzb_d, this%dvzb_d, this%dwzb_d, n_gl)
 
  309         call this%GLL_to_GL%map(this%temp, this%vr, nel, this%Xh_GLL)
 
  310         call device_sub2(fz%x_d, this%temp_d, n)
 
  316         call device_col3(this%duxb_d, this%tx_d, this%txb_d, n_gl)
 
  317         call device_col3(this%duyb_d, this%tx_d, this%tyb_d, n_gl)
 
  318         call device_col3(this%duzb_d, this%tx_d, this%tzb_d, n_gl)
 
  322         call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
 
  323         call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
 
  324         call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
 
  327         call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
 
  328         call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
 
  329         call device_sub2(fx%x_d, this%temp_d, n)
 
  334         call device_col3(this%duxb_d, this%ty_d, this%txb_d, n_gl)
 
  335         call device_col3(this%duyb_d, this%ty_d, this%tyb_d, n_gl)
 
  336         call device_col3(this%duzb_d, this%ty_d, this%tzb_d, n_gl)
 
  340         call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
 
  341         call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
 
  342         call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
 
  345         call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
 
  346         call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
 
  347         call device_sub2(fy%x_d, this%temp_d, n)
 
  351         call device_col3(this%duxb_d, this%tz_d, this%txb_d, n_gl)
 
  352         call device_col3(this%duyb_d, this%tz_d, this%tyb_d, n_gl)
 
  353         call device_col3(this%duzb_d, this%tz_d, this%tzb_d, n_gl)
 
  357         call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
 
  358         call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
 
  359         call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
 
  362         call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
 
  363         call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
 
  364         call device_sub2(fz%x_d, this%temp_d, n)
 
  365      else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) 
then 
  371         do e = 1, coef%msh%nelv
 
  373            call this%GLL_to_GL%map(txb, vxb%x(1,1,1,e), 1, this%Xh_GL)
 
  374            call this%GLL_to_GL%map(tyb, vyb%x(1,1,1,e), 1, this%Xh_GL)
 
  375            call this%GLL_to_GL%map(tzb, vzb%x(1,1,1,e), 1, this%Xh_GL)
 
  378            call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL)
 
  379            call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL)
 
  380            call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL)
 
  385            call opgrad(duxb, duyb, duzb, txb, c_gl, e, e)
 
  386            call opgrad(dvxb, dvyb, dvzb, tyb, c_gl, e, e)
 
  387            call opgrad(dwxb, dwyb, dwzb, tzb, c_gl, e, e)
 
  390            do i = 1, this%Xh_GL%lxyz
 
  391               tfx(i) = tx(i)*duxb(i) + ty(i)*dvxb(i) + tz(i)*dwxb(i)
 
  392               tfy(i) = tx(i)*duyb(i) + ty(i)*dvyb(i) + tz(i)*dwyb(i)
 
  393               tfz(i) = tx(i)*duzb(i) + ty(i)*dvzb(i) + tz(i)*dwzb(i)
 
  397            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
 
  398            call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)
 
  399            call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)
 
  402            idx = (e-1)*this%Xh_GLL%lxyz+1
 
  403            call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
 
  404            call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)
 
  405            call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)
 
  408            do i = 1, this%Xh_GL%lxyz
 
  409               duxb(i) = tx(i)*txb(i)
 
  410               duyb(i) = tx(i)*tyb(i)
 
  411               duzb(i) = tx(i)*tzb(i)
 
  415            call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
 
  416            call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
 
  417            call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
 
  420            do i = 1, this%Xh_GL%lxyz
 
  421               tfx(i) = tfx(i) + tfy(i) + tfz(i)
 
  425            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
 
  426            call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
 
  429            do i = 1, this%Xh_GL%lxyz
 
  430               duxb(i) = ty(i)*txb(i)
 
  431               duyb(i) = ty(i)*tyb(i)
 
  432               duzb(i) = ty(i)*tzb(i)
 
  436            call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
 
  437            call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
 
  438            call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
 
  441            do i = 1, this%Xh_GL%lxyz
 
  442               tfx(i) = tfx(i) + tfy(i) + tfz(i)
 
  446            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
 
  447            call sub2(fy%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
 
  450            do i = 1, this%Xh_GL%lxyz
 
  451               duxb(i) = tz(i)*txb(i)
 
  452               duyb(i) = tz(i)*tyb(i)
 
  453               duzb(i) = tz(i)*tzb(i)
 
  456            call cdtp(tfx, duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
 
  457            call cdtp(tfy, duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
 
  458            call cdtp(tfz, duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
 
  461            do i = 1, this%Xh_GL%lxyz
 
  462               tfx(i) = tfx(i) + tfy(i) + tfz(i)
 
  466            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
 
  467            call sub2(fz%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
 
  475  end subroutine compute_adjoint_advection_dealias
 
  493  subroutine compute_linear_advection_dealias(this, vx, vy, vz, vxb, vyb, vzb, &
 
  494       fx, fy, fz, Xh, coef, n)
 
  497    type(space_t), 
intent(inout) :: Xh
 
  498    type(coef_t), 
intent(inout) :: coef
 
  499    type(field_t), 
intent(inout) :: vx, vy, vz
 
  500    type(field_t), 
intent(inout) :: vxb, vyb, vzb
 
  501    integer, 
intent(in) :: n
 
  502    type(field_t), 
intent(inout) :: fx, fy, fz
 
  503    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: tx, ty, tz
 
  504    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: txb, tyb, tzb
 
  505    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz
 
  506    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: vr, vs, vt
 
  507    real(kind=rp), 
dimension(this%Xh_GLL%lxyz) :: tempx, tempy, tempz
 
  509    integer :: e, i, idx, nel, n_GL
 
  511    n_gl = nel * this%Xh_GL%lxyz
 
  514    associate(c_gl => this%coef_GL)
 
  516      if (neko_bcknd_device .eq. 1) 
then 
  518         call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
 
  519         call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
 
  520         call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
 
  523         call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
 
  524         call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
 
  525         call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
 
  528         call opgrad(this%vr, this%vs, this%vt, this%txb, c_gl)
 
  529         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
 
  530              this%tx_d, this%ty_d, this%tz_d, n_gl)
 
  531         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  532         call device_sub2(fx%x_d, this%temp_d, n)
 
  535         call opgrad(this%vr, this%vs, this%vt, this%tyb, c_gl)
 
  536         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
 
  537              this%tx_d, this%ty_d, this%tz_d, n_gl)
 
  538         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  539         call device_sub2(fy%x_d, this%temp_d, n)
 
  541         call opgrad(this%vr, this%vs, this%vt, this%tzb, c_gl)
 
  542         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
 
  543              this%tx_d, this%ty_d, this%tz_d, n_gl)
 
  544         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  545         call device_sub2(fz%x_d, this%temp_d, n)
 
  548         call opgrad(this%vr, this%vs, this%vt, this%tx, c_gl)
 
  549         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
 
  550              this%txb_d, this%tyb_d, this%tzb_d, n_gl)
 
  551         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  552         call device_sub2(fx%x_d, this%temp_d, n)
 
  555         call opgrad(this%vr, this%vs, this%vt, this%ty, c_gl)
 
  556         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
 
  557              this%txb_d, this%tyb_d, this%tzb_d, n_gl)
 
  558         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  559         call device_sub2(fy%x_d, this%temp_d, n)
 
  561         call opgrad(this%vr, this%vs, this%vt, this%tz, c_gl)
 
  562         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
 
  563              this%txb_d, this%tyb_d, this%tzb_d, n_gl)
 
  564         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  565         call device_sub2(fz%x_d, this%temp_d, n)
 
  567      else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) 
then 
  569         call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
 
  570         call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
 
  571         call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
 
  574         call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
 
  575         call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
 
  576         call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
 
  579         call opgrad(this%vr, this%vs, this%vt, this%txb, c_gl)
 
  580         call vdot3(this%tbf, this%vr, this%vs, this%vt, &
 
  581              this%tx, this%ty, this%tz, n_gl)
 
  582         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  583         call sub2(fx%x, this%temp, n)
 
  586         call opgrad(this%vr, this%vs, this%vt, this%tyb, c_gl)
 
  587         call vdot3(this%tbf, this%vr, this%vs, this%vt, &
 
  588              this%tx, this%ty, this%tz, n_gl)
 
  589         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  590         call sub2(fy%x, this%temp, n)
 
  592         call opgrad(this%vr, this%vs, this%vt, this%tzb, c_gl)
 
  593         call vdot3(this%tbf, this%vr, this%vs, this%vt, &
 
  594              this%tx, this%ty, this%tz, n_gl)
 
  595         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  596         call sub2(fz%x, this%temp, n)
 
  599         call opgrad(this%vr, this%vs, this%vt, this%tx, c_gl)
 
  600         call vdot3(this%tbf, this%vr, this%vs, this%vt, &
 
  601              this%txb, this%tyb, this%tzb, n_gl)
 
  602         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  603         call sub2(fx%x, this%temp, n)
 
  606         call opgrad(this%vr, this%vs, this%vt, this%ty, c_gl)
 
  607         call vdot3(this%tbf, this%vr, this%vs, this%vt, &
 
  608              this%txb, this%tyb, this%tzb, n_gl)
 
  609         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  610         call sub2(fy%x, this%temp, n)
 
  612         call opgrad(this%vr, this%vs, this%vt, this%tz, c_gl)
 
  613         call vdot3(this%tbf, this%vr, this%vs, this%vt, &
 
  614              this%txb, this%tyb, this%tzb, n_gl)
 
  615         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
  616         call sub2(fz%x, this%temp, n)
 
  619         do e = 1, coef%msh%nelv
 
  621            call this%GLL_to_GL%map(txb, vxb%x(1,1,1,e), 1, this%Xh_GL)
 
  622            call this%GLL_to_GL%map(tyb, vyb%x(1,1,1,e), 1, this%Xh_GL)
 
  623            call this%GLL_to_GL%map(tzb, vzb%x(1,1,1,e), 1, this%Xh_GL)
 
  625            call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL)
 
  626            call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL)
 
  627            call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL)
 
  630            call opgrad(vr, vs, vt, txb, c_gl, e, e)
 
  631            do i = 1, this%Xh_GL%lxyz
 
  632               tfx(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
 
  635            call opgrad(vr, vs, vt, tyb, c_gl, e, e)
 
  636            do i = 1, this%Xh_GL%lxyz
 
  637               tfy(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
 
  640            call opgrad(vr, vs, vt, tzb, c_gl, e, e)
 
  641            do i = 1, this%Xh_GL%lxyz
 
  642               tfz(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
 
  645            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
 
  646            call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)
 
  647            call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)
 
  649            idx = (e-1)*this%Xh_GLL%lxyz+1
 
  650            call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
 
  651            call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)
 
  652            call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)
 
  655            call opgrad(vr, vs, vt, tx, c_gl, e, e)
 
  656            do i = 1, this%Xh_GL%lxyz
 
  657               tfx(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)
 
  660            call opgrad(vr, vs, vt, ty, c_gl, e, e)
 
  661            do i = 1, this%Xh_GL%lxyz
 
  662               tfy(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)
 
  665            call opgrad(vr, vs, vt, tz, c_gl, e, e)
 
  666            do i = 1, this%Xh_GL%lxyz
 
  667               tfz(i) = txb(i)*vr(i) + tyb(i)*vs(i) + tzb(i)*vt(i)
 
  670            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
 
  671            call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)
 
  672            call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)
 
  674            idx = (e-1)*this%Xh_GLL%lxyz+1
 
  675            call sub2(fx%x(idx, 1, 1, 1), tempx, this%Xh_GLL%lxyz)
 
  676            call sub2(fy%x(idx, 1, 1, 1), tempy, this%Xh_GLL%lxyz)
 
  677            call sub2(fz%x(idx, 1, 1, 1), tempz, this%Xh_GLL%lxyz)
 
  681  end subroutine compute_linear_advection_dealias
 
  697  subroutine compute_adjoint_scalar_advection_dealias(this, vxb, vyb, vzb, s, &
 
  700    type(field_t), 
intent(inout) :: vxb, vyb, vzb
 
  701    type(field_t), 
intent(inout) :: s
 
  702    type(field_t), 
intent(inout) :: fs
 
  703    type(space_t), 
intent(inout) :: Xh
 
  704    type(coef_t), 
intent(inout) :: coef
 
  705    integer, 
intent(in) :: n
 
  706    real(kind=rp), 
intent(in), 
optional :: dt
 
  708    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: vx_gl, vy_gl, vz_gl, s_gl
 
  709    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: work1, work2, work3
 
  710    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: w1, w2, w3
 
  711    real(kind=rp), 
dimension(this%Xh_GL%lxyz) :: f_gl
 
  712    integer :: e, i, idx, nel, n_GL
 
  713    real(kind=rp), 
dimension(this%Xh_GLL%lxyz) :: temp
 
  716    n_gl = nel * this%Xh_GL%lxyz
 
  718    associate(c_gl => this%coef_GL)
 
  719      if (neko_bcknd_device .eq. 1) 
then 
  721         call this%GLL_to_GL%map(this%txb, vxb%x, nel, this%Xh_GL)
 
  722         call this%GLL_to_GL%map(this%tyb, vyb%x, nel, this%Xh_GL)
 
  723         call this%GLL_to_GL%map(this%tzb, vzb%x, nel, this%Xh_GL)
 
  726         call this%GLL_to_GL%map(this%tx, s%x, nel, this%Xh_GL)
 
  729         call device_col3(this%duxb_d, this%tx_d, this%txb_d, n_gl)
 
  730         call device_col3(this%duyb_d, this%tx_d, this%tyb_d, n_gl)
 
  731         call device_col3(this%duzb_d, this%tx_d, this%tzb_d, n_gl)
 
  735         call cdtp(this%vr, this%duxb, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl)
 
  736         call cdtp(this%vs, this%duyb, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl)
 
  737         call cdtp(this%vt, this%duzb, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl)
 
  740         call device_add4(this%duxb_d, this%vr_d, this%vs_d, this%vt_d, n_gl)
 
  743         call this%GLL_to_GL%map(this%temp, this%duxb, nel, this%Xh_GLL)
 
  746         call device_sub2(fs%x_d, this%temp_d, n)
 
  749      else if ((neko_bcknd_sx .eq. 1) .or. (neko_bcknd_xsmm .eq. 1)) 
then 
  750         call neko_error(
"Adjoint scalar not implemented for SX")
 
  752         do e = 1, coef%msh%nelv
 
  754            call this%GLL_to_GL%map(vx_gl, vxb%x(1,1,1,e), 1, this%Xh_GL)
 
  755            call this%GLL_to_GL%map(vy_gl, vyb%x(1,1,1,e), 1, this%Xh_GL)
 
  756            call this%GLL_to_GL%map(vz_gl, vzb%x(1,1,1,e), 1, this%Xh_GL)
 
  759            call this%GLL_to_GL%map(s_gl, s%x(1,1,1,e), 1, this%Xh_GL)
 
  761            do i = 1, this%Xh_GL%lxyz
 
  762               work1(i) = s_gl(i)*vx_gl(i)
 
  763               work2(i) = s_gl(i)*vy_gl(i)
 
  764               work3(i) = s_gl(i)*vz_gl(i)
 
  768            call cdtp(w1, work1, c_gl%drdx, c_gl%dsdx, c_gl%dtdx, c_gl, e, e)
 
  769            call cdtp(w2, work2, c_gl%drdy, c_gl%dsdy, c_gl%dtdy, c_gl, e, e)
 
  770            call cdtp(w3, work3, c_gl%drdz, c_gl%dsdz, c_gl%dtdz, c_gl, e, e)
 
  773            do i = 1, this%Xh_GL%lxyz
 
  774               f_gl(i) = w1(i) + w2(i) + w3(i)
 
  778            idx = (e-1)*this%Xh_GLL%lxyz+1
 
  779            call this%GLL_to_GL%map(temp, f_gl, 1, this%Xh_GLL)
 
  780            call sub2(fs%x(idx, 1, 1, 1), temp, this%Xh_GLL%lxyz)
 
  787  end subroutine compute_adjoint_scalar_advection_dealias
 
Subroutines to add advection terms to the RHS of a transport equation.
 
Subroutines to add advection terms to the RHS of a transport equation.
 
Type encapsulating advection routines with dealiasing.
 
Base abstract type for computing the advection operator.