71 use num_types,
only: rp, dp, sp
72 use json_module,
only: json_file
73 use json_utils,
only: json_get_or_default
74 use vector,
only: vector_t
75 use matrix,
only: matrix_t
76 use comm,
only: pe_rank, neko_comm, pe_size, mpi_real_precision
77 use utils,
only: neko_error, filename_suffix
78 use neko_config,
only: neko_bcknd_device, neko_bcknd_cuda, neko_bcknd_hip, &
80 use device,
only: device_memcpy, host_to_device, device_to_host
81 use,
intrinsic :: iso_c_binding, only: c_ptr
82 use logger,
only: neko_log
83 use mpi_f08,
only: mpi_sum, mpi_allreduce, mpi_integer
84 use scratch_registry,
only: scratch_registry_t
92 integer :: n, m, n_global, max_iter
93 real(kind=rp) :: a0, asyinit, asyincr, asydecr, epsimin, &
95 type(vector_t) :: xold1, xold2, low, upp, alpha, beta, a, c, d, xmax, xmin
96 logical :: is_initialized = .false.
97 logical :: is_updated = .false.
98 type(scratch_registry_t) :: scratch
99 character(len=:),
allocatable :: subsolver, bcknd
102 type(vector_t) :: p0j, q0j
103 type(matrix_t) :: pij, qij
107 real(kind=rp) :: z, zeta
108 type(vector_t) :: y, lambda, s, mu
109 type(vector_t) :: xsi, eta
112 generic,
public :: init => init_from_json, init_from_components
114 procedure,
public, pass(this) :: init_from_components => &
115 mma_init_from_components
116 procedure,
public, pass(this) :: free => mma_free
117 procedure,
public, pass(this) :: get_n => mma_get_n
118 procedure,
public, pass(this) :: get_m => mma_get_m
119 procedure,
public, pass(this) :: get_residumax => mma_get_residumax
120 procedure,
public, pass(this) :: get_residunorm => mma_get_residunorm
121 procedure,
public, pass(this) :: get_max_iter => mma_get_max_iter
122 procedure,
public, pass(this) :: get_backend_and_subsolver => &
123 mma_get_backend_and_subsolver
125 generic,
public :: update => update_vector, update_cpu, update_device
126 procedure, pass(this) :: update_vector => mma_update_vector
127 procedure, pass(this) :: update_cpu => mma_update_cpu
128 procedure, pass(this) :: update_device => mma_update_device
130 generic,
public :: kkt => kkt_vector, kkt_cpu, kkt_device
131 procedure, pass(this) :: kkt_vector => mma_kkt_vector
132 procedure, pass(this) :: kkt_cpu => mma_kkt_cpu
133 procedure, pass(this) :: kkt_device => mma_kkt_device
135 procedure, pass(this) :: save_checkpoint => mma_save_checkpoint
136 procedure, pass(this) :: load_checkpoint => mma_load_checkpoint
139 procedure, pass(this) :: copy_from => mma_copy_from
148 module subroutine mma_update_cpu(this, iter, x, df0dx, fval, dfdx)
149 class(mma_t),
intent(inout) :: this
150 integer,
intent(in) :: iter
151 real(kind=rp),
dimension(this%n),
intent(inout) :: x
152 real(kind=rp),
dimension(this%n),
intent(in) :: df0dx
153 real(kind=rp),
dimension(this%m),
intent(in) :: fval
154 real(kind=rp),
dimension(this%m, this%n),
intent(in) :: dfdx
155 end subroutine mma_update_cpu
158 module subroutine mma_kkt_cpu(this, x, df0dx, fval, dfdx)
159 class(mma_t),
intent(inout) :: this
160 real(kind=rp),
dimension(this%n),
intent(in) :: x
161 real(kind=rp),
dimension(this%n),
intent(in) :: df0dx
162 real(kind=rp),
dimension(this%m),
intent(in) :: fval
163 real(kind=rp),
dimension(this%m, this%n),
intent(in) :: dfdx
164 end subroutine mma_kkt_cpu
170 module subroutine mma_update_device(this, iter, x, df0dx, fval, dfdx)
171 class(mma_t),
intent(inout) :: this
172 integer,
intent(in) :: iter
173 type(c_ptr),
intent(inout) :: x
174 type(c_ptr),
intent(in) :: df0dx, fval, dfdx
175 end subroutine mma_update_device
178 module subroutine mma_kkt_device(this, x, df0dx, fval, dfdx)
179 class(mma_t),
intent(inout) :: this
180 type(c_ptr),
intent(in) :: x, df0dx, fval, dfdx
181 end subroutine mma_kkt_device
189 module subroutine mma_save_checkpoint_hdf5(object, filename, overwrite)
190 class(mma_t),
intent(inout) :: object
191 character(len=*),
intent(in) :: filename
192 logical,
intent(in),
optional :: overwrite
193 end subroutine mma_save_checkpoint_hdf5
195 module subroutine mma_load_checkpoint_hdf5(object, filename)
196 class(mma_t),
intent(inout) :: object
197 character(len=*),
intent(in) :: filename
198 end subroutine mma_load_checkpoint_hdf5
207 subroutine mma_init_from_json(this, x, n, m, json, scale, auto_scale)
219 class(mma_t),
intent(inout) :: this
220 integer,
intent(in) :: n, m
221 type(vector_t),
intent(in) :: x
223 type(json_file),
intent(inout) :: json
226 real(kind=rp),
intent(out) :: scale
227 logical,
intent(out) :: auto_scale
235 real(kind=rp),
dimension(n) :: xmax, xmin
236 real(kind=rp),
dimension(m) :: a, c, d
237 character(len=:),
allocatable :: subsolver, bcknd, bcknd_default
240 real(kind=rp) :: a0 , xmax_const, xmin_const, a_const, c_const, d_const
242 integer :: max_iter, n_global, ierr
243 real(kind=rp) :: epsimin, asyinit, asyincr, asydecr
245 call mpi_allreduce(n, n_global, 1, mpi_integer, &
246 mpi_sum, neko_comm, ierr)
249 if (neko_bcknd_device .eq. 1)
then
250 bcknd_default =
"device"
252 bcknd_default =
"cpu"
258 call json_get_or_default(json,
'mma.epsimin', epsimin, &
259 1.0e-9_rp * sqrt(real(m + n_global, rp)))
260 call json_get_or_default(json,
'mma.max_iter', max_iter, 100)
263 call json_get_or_default(json,
'mma.asyinit', asyinit, 0.5_rp)
264 call json_get_or_default(json,
'mma.asyincr', asyincr, 1.2_rp)
265 call json_get_or_default(json,
'mma.asydecr', asydecr, 0.7_rp)
267 call json_get_or_default(json,
'mma.backend', bcknd, bcknd_default)
268 call json_get_or_default(json,
'mma.subsolver', subsolver,
'dip')
270 call json_get_or_default(json,
'mma.xmin', xmin_const, 0.0_rp)
271 call json_get_or_default(json,
'mma.xmax', xmax_const, 1.0_rp)
272 call json_get_or_default(json,
'mma.a0', a0, 1.0_rp)
273 call json_get_or_default(json,
'mma.a', a_const, 0.0_rp)
274 call json_get_or_default(json,
'mma.c', c_const, 100.0_rp)
275 call json_get_or_default(json,
'mma.d', d_const, 0.0_rp)
277 call json_get_or_default(json,
'mma.scale', scale, 10.0_rp)
278 call json_get_or_default(json,
'mma.auto_scale', auto_scale, .false.)
290 call this%init(x, n, m, a0, a, c, d, xmin, xmax, &
291 max_iter, epsimin, asyinit, asyincr, asydecr, bcknd, subsolver)
293 end subroutine mma_init_from_json
296 subroutine mma_free(this)
297 class(mma_t),
intent(inout) :: this
299 call this%xold1%free()
300 call this%xold2%free()
301 call this%alpha%free()
302 call this%beta%free()
308 call this%xmax%free()
309 call this%xmin%free()
314 call this%lambda%free()
323 call this%scratch%free()
325 this%is_initialized = .false.
326 this%is_updated = .false.
327 end subroutine mma_free
330 subroutine mma_init_from_components(this, x, n, m, a0, a, c, d, xmin, xmax, &
331 max_iter, epsimin, asyinit, asyincr, asydecr, bcknd, subsolver)
343 class(mma_t),
intent(inout) :: this
344 integer,
intent(in) :: n, m
345 type(vector_t),
intent(in) :: x
353 real(kind=rp),
intent(in),
dimension(n) :: xmax, xmin
354 real(kind=rp),
intent(in),
dimension(m) :: a, c, d
355 real(kind=rp),
intent(in) :: a0
356 integer,
intent(in),
optional :: max_iter
357 real(kind=rp),
intent(in),
optional :: epsimin, asyinit, asyincr, asydecr
358 character(len=*),
intent(in),
optional :: bcknd, subsolver
359 character(len=256) :: log_msg
363 call this%scratch%init()
368 call this%xold1%init(n)
369 call this%xold2%init(n)
373 call this%alpha%init(n)
374 call this%beta%init(n)
379 call this%low%init(n)
380 call this%upp%init(n)
381 call this%xmax%init(n)
382 call this%xmin%init(n)
385 call this%p0j%init(n)
386 call this%q0j%init(n)
387 call this%pij%init(m, n)
388 call this%qij%init(m, n)
393 call this%lambda%init(m)
396 call this%xsi%init(n)
397 call this%eta%init(n)
408 if (neko_bcknd_device .eq. 1)
then
409 call this%a%copy_from(host_to_device, sync = .false.)
410 call this%c%copy_from(host_to_device, sync = .false.)
411 call this%d%copy_from(host_to_device, sync = .false.)
412 call this%xmax%copy_from(host_to_device, sync = .false.)
413 call this%xmin%copy_from(host_to_device, sync = .true.)
417 this%residumax = huge(0.0_rp)
418 this%residunorm = huge(0.0_rp)
421 call mpi_allreduce(n, this%n_global, 1, mpi_integer, mpi_sum, neko_comm, &
428 if (.not.
present(max_iter)) this%max_iter = 100
429 if (.not.
present(epsimin))
then
430 this%epsimin = 1.0e-9_rp * sqrt(real(this%m + this%n_global, rp))
434 if (.not.
present(asyinit)) this%asyinit = 0.5_rp
435 if (.not.
present(asyincr)) this%asyincr = 1.2_rp
436 if (.not.
present(asydecr)) this%asydecr = 0.7_rp
439 if (.not.
present(bcknd) .and. neko_bcknd_device .eq. 0)
then
441 else if (.not.
present(bcknd))
then
442 this%bcknd =
"device"
446 if (.not.
present(subsolver)) this%subsolver =
"dip"
449 if (
present(max_iter)) this%max_iter = max_iter
450 if (
present(epsimin)) this%epsimin = epsimin
451 if (
present(asyinit)) this%asyinit = asyinit
452 if (
present(asyincr)) this%asyincr = asyincr
453 if (
present(asydecr)) this%asydecr = asydecr
454 if (
present(bcknd)) this%bcknd = bcknd
455 if (
present(subsolver)) this%subsolver = subsolver
457 call neko_log%section(
'MMA Parameters')
459 write(log_msg,
'(A10,1X,A)')
'backend ', trim(this%bcknd)
460 call neko_log%message(log_msg)
461 write(log_msg,
'(A10,1X,A)')
'subsolver ', trim(this%subsolver)
462 call neko_log%message(log_msg)
464 write(log_msg,
'(A10,1X,I0)')
'n ', this%n_global
465 call neko_log%message(log_msg)
466 write(log_msg,
'(A10,1X,I0)')
'm ', this%m
467 call neko_log%message(log_msg)
468 write(log_msg,
'(A10,1X,I0)')
'max_iter ', this%max_iter
469 call neko_log%message(log_msg)
471 write(log_msg,
'(A10,1X,E11.5)')
'epsimin ', this%epsimin
472 call neko_log%message(log_msg)
474 write(log_msg,
'(A10,1X,E11.5)')
'asyinit ', this%asyinit
475 call neko_log%message(log_msg)
476 write(log_msg,
'(A10,1X,E11.5)')
'asyincr ', this%asyincr
477 call neko_log%message(log_msg)
478 write(log_msg,
'(A10,1X,E11.5)')
'asydecr ', this%asydecr
479 call neko_log%message(log_msg)
480 write(log_msg,
'(A10,1X,E11.5)')
'a0 ', this%a0
481 call neko_log%message(log_msg)
483 call neko_log%message(
'Parameters a')
485 write(log_msg,
'(3X,A,I2,A,E11.5)')
'a(', i,
') = ', this%a%x(i)
486 call neko_log%message(log_msg)
488 call neko_log%message(
'Parameters c')
490 write(log_msg,
'(3X,A,I2,A,E11.5)')
'c(', i,
') = ', this%c%x(i)
491 call neko_log%message(log_msg)
493 call neko_log%message(
'Parameters d')
495 write(log_msg,
'(3X,A,I2,A,E11.5)')
'd(', i,
') = ', this%d%x(i)
496 call neko_log%message(log_msg)
499 call neko_log%end_section()
502 this%is_initialized = .true.
503 end subroutine mma_init_from_components
509 subroutine mma_update_vector(this, iter, x, df0dx, fval, dfdx)
510 class(mma_t),
intent(inout) :: this
511 integer,
intent(in) :: iter
512 type(vector_t),
intent(inout) :: x
513 type(vector_t),
intent(inout) :: df0dx, fval
514 type(matrix_t),
intent(inout) :: dfdx
517 select case (this%bcknd)
519 if (neko_bcknd_device .eq. 1)
then
520 call x%copy_from(device_to_host, sync = .false.)
521 call df0dx%copy_from(device_to_host, sync = .false.)
522 call fval%copy_from(device_to_host, sync = .false.)
523 call dfdx%copy_from(device_to_host, sync = .true.)
526 call mma_update_cpu(this, iter, x%x, df0dx%x, fval%x, dfdx%x)
528 if (neko_bcknd_device .eq. 1)
then
529 call x%copy_from(host_to_device, sync = .true.)
533 call mma_update_device(this, iter, x%x_d, df0dx%x_d, fval%x_d, dfdx%x_d)
536 end subroutine mma_update_vector
539 subroutine mma_kkt_vector(this, x, df0dx, fval, dfdx)
540 class(mma_t),
intent(inout) :: this
541 type(vector_t),
intent(inout) :: x, df0dx, fval
542 type(matrix_t),
intent(inout) :: dfdx
545 select case (this%bcknd )
547 if (neko_bcknd_device .eq. 1)
then
548 call device_memcpy(x%x, x%x_d, this%n, device_to_host, &
550 call device_memcpy(df0dx%x, df0dx%x_d, this%n, device_to_host, &
552 call device_memcpy(fval%x, fval%x_d, this%m, device_to_host, &
554 call device_memcpy(dfdx%x, dfdx%x_d, this%m * this%n, device_to_host,&
558 call mma_kkt_cpu(this, x%x, df0dx%x, fval%x, dfdx%x)
560 call mma_kkt_device(this, x%x_d, df0dx%x_d, fval%x_d, dfdx%x_d)
562 end subroutine mma_kkt_vector
571 subroutine mma_save_checkpoint(this, filename, overwrite)
572 class(mma_t),
intent(inout) :: this
573 character(len=*),
intent(in) :: filename
574 logical,
intent(in),
optional :: overwrite
575 character(len=12) :: file_ext
578 call filename_suffix(filename, file_ext)
580 select case (trim(file_ext))
581 case (
'h5',
'hdf5',
'hf5')
582 call mma_save_checkpoint_hdf5(this, filename, overwrite)
584 call neko_error(
'mma_save_checkpoint: Unsupported file format: ' // &
588 end subroutine mma_save_checkpoint
593 subroutine mma_load_checkpoint(this, filename)
594 class(mma_t),
intent(inout) :: this
595 character(len=*),
intent(in) :: filename
596 character(len=12) :: file_ext
599 call filename_suffix(filename, file_ext)
601 select case (trim(file_ext))
602 case (
'h5',
'hdf5',
'hf5')
603 call mma_load_checkpoint_hdf5(this, filename)
605 call neko_error(
'mma_load_checkpoint: Unsupported file format: ' // &
608 end subroutine mma_load_checkpoint
614 pure function mma_get_n(this)
result(n)
615 class(mma_t),
intent(in) :: this
618 end function mma_get_n
621 pure function mma_get_m(this)
result(m)
622 class(mma_t),
intent(in) :: this
625 end function mma_get_m
628 pure function mma_get_residumax(this)
result(residumax)
629 class(mma_t),
intent(in) :: this
630 real(kind=rp) :: residumax
631 residumax = this%residumax
632 end function mma_get_residumax
635 pure function mma_get_residunorm(this)
result(residunorm)
636 class(mma_t),
intent(in) :: this
637 real(kind=rp) :: residunorm
638 residunorm = this%residunorm
639 end function mma_get_residunorm
642 pure function mma_get_max_iter(this)
result(max_iter_value)
643 class(mma_t),
intent(in) :: this
644 integer :: max_iter_value
645 max_iter_value = this%max_iter
646 end function mma_get_max_iter
649 pure function mma_get_backend_and_subsolver(this)
result(backend_subsolver)
650 class(mma_t),
intent(in) :: this
651 character(len=:),
allocatable :: backend_subsolver
652 character(len=:),
allocatable :: backend
654 if (neko_bcknd_cuda .eq. 1)
then
656 else if (neko_bcknd_hip .eq. 1)
then
658 else if (neko_bcknd_opencl .eq. 1)
then
664 backend_subsolver =
'backend:' // trim(backend) //
', subsolver:' // &
666 end function mma_get_backend_and_subsolver
672 subroutine mma_copy_from(this, direction, sync)
673 class(mma_t),
intent(inout) :: this
674 integer,
intent(in) :: direction
675 logical,
intent(in) :: sync
677 call this%xold1%copy_from(direction, sync = .false.)
678 call this%xold2%copy_from(direction, sync = .false.)
679 call this%xmax%copy_from(direction, sync = .false.)
680 call this%xmin%copy_from(direction, sync = .false.)
682 call this%low%copy_from(direction, sync = .false.)
683 call this%upp%copy_from(direction, sync = .false.)
685 call this%a%copy_from(direction, sync = .false.)
686 call this%c%copy_from(direction, sync = .false.)
687 call this%d%copy_from(direction, sync = .false.)
688 call this%y%copy_from(direction, sync = .false.)
689 call this%s%copy_from(direction, sync = .false.)
691 call this%p0j%copy_from(direction, sync = .false.)
692 call this%q0j%copy_from(direction, sync = .false.)
693 call this%pij%copy_from(direction, sync = .false.)
694 call this%qij%copy_from(direction, sync = .false.)
695 call this%bi%copy_from(direction, sync = .false.)
697 call this%alpha%copy_from(direction, sync = .false.)
698 call this%beta%copy_from(direction, sync = .false.)
699 call this%lambda%copy_from(direction, sync = .false.)
700 call this%mu%copy_from(direction, sync = .false.)
701 call this%xsi%copy_from(direction, sync = .false.)
702 call this%eta%copy_from(direction, sync = sync)
704 end subroutine mma_copy_from
710 module subroutine mma_save_checkpoint_hdf5(object, filename, overwrite)
711 class(mma_t),
intent(inout) :: object
712 character(len=*),
intent(in) :: filename
713 logical,
intent(in),
optional :: overwrite
714 call neko_error(
'mma: HDF5 support not enabled rebuild with HAVE_HDF5')
715 end subroutine mma_save_checkpoint_hdf5
717 module subroutine mma_load_checkpoint_hdf5(object, filename)
718 class(mma_t),
intent(inout) :: object
719 character(len=*),
intent(in) :: filename
720 call neko_error(
'mma: HDF5 support not enabled rebuild with HAVE_HDF5')
721 end subroutine mma_load_checkpoint_hdf5
subroutine mma_init_from_json(this, x, n, m, json, scale, auto_scale)
Device KKT check for convergence.