35module simulation_checkpoint
36 use num_types,
only: rp, sp, dp
37 use case,
only: case_t
38 use json_file_module,
only: json_file
39 use json_utils,
only: json_get, json_get_or_default
40 use scalar_scheme,
only: scalar_scheme_t
41 use time_state,
only: time_state_t
42 use chkp_output,
only: chkp_output_t
43 use field,
only: field_t
44 use field_list,
only: field_list_t
45 use logger,
only: neko_log, log_size, neko_log_debug
46 use mpi_f08,
only: mpi_wtime
47 use utils,
only: neko_error
48 use math,
only: copy, rzero
49 use profiler,
only: profiler_start_region, profiler_end_region
50 use neko_config,
only: neko_bcknd_device
51 use device,
only: device_memcpy, device_to_host, host_to_device
52 use registry,
only: neko_registry
63 logical :: enabled = .false.
65 character(len=256) :: algorithm =
"linear"
67 character(len=256) :: filename =
"checkpoint"
69 character(len=8) :: fmt =
"chkp"
71 integer :: n_saves_memory = 10
73 logical :: keep_checkpoints = .false.
76 integer :: n_saves_disc = 0
77 integer :: n_timesteps = 0
78 integer :: first_valid_timestep = 2
79 integer :: loaded_checkpoint = -1
82 type(field_list_t) :: state_list
83 type(host_array),
dimension(:,:),
allocatable :: state_storage
86 type(chkp_output_t) :: chkp_output
90 generic,
public :: init => init_from_json, init_from_components
92 procedure,
public, pass(this) :: init_from_json => &
93 checkpoint_init_from_json
95 procedure,
public, pass(this) :: init_from_components => &
96 checkpoint_init_from_components
98 procedure,
public, pass(this) :: free => checkpoint_free
100 procedure,
public, pass(this) :: reset => checkpoint_reset
102 procedure,
public, pass(this) ::
save => checkpoint_save
104 procedure,
public, pass(this) :: restore => checkpoint_restore
107 procedure, pass(this) :: save_data => checkpoint_save_data
109 procedure, pass(this) :: load_data => checkpoint_load_data
113 real(kind=rp),
allocatable ::
data(:)
116 procedure, pass(this) :: init => host_array_init
117 procedure, pass(this) :: free => host_array_free
118 procedure, pass(this) :: is_allocated => host_array_is_allocated
126 module subroutine checkpoint_save_linear(this, neko_case)
127 class(simulation_checkpoint_t),
intent(inout) :: this
128 class(case_t),
intent(inout) :: neko_case
129 end subroutine checkpoint_save_linear
132 module subroutine checkpoint_restore_linear(this, neko_case, tstep)
133 class(simulation_checkpoint_t),
intent(inout) :: this
134 class(case_t),
target,
intent(inout) :: neko_case
135 integer,
intent(in) :: tstep
136 end subroutine checkpoint_restore_linear
145 subroutine checkpoint_init_from_json(this, neko_case, params)
146 class(simulation_checkpoint_t),
intent(inout) :: this
147 class(case_t),
target,
intent(inout) :: neko_case
148 type(json_file),
target,
intent(inout) :: params
149 integer :: n_saves_memory
150 character(len=:),
allocatable :: filename, algorithm, fmt
151 character(len=256),
dimension(:),
allocatable :: extra_field_names
152 type(field_list_t) :: extra_fields
153 type(field_t),
pointer :: fi
155 logical :: enabled, keep_checkpoints
157 call json_get_or_default(params,
"enabled", enabled, .false.)
158 if (.not. enabled)
return
160 call json_get_or_default(params,
"algorithm", algorithm,
"linear")
161 call json_get_or_default(params,
"n_memory", n_saves_memory, 10)
162 call json_get_or_default(params,
"filename", filename,
"checkpoint")
163 call json_get_or_default(params,
"format", fmt,
"chkp")
164 call json_get_or_default(params,
"keep_checkpoints", keep_checkpoints, &
167 if (
"extra_fields" .in. params)
then
168 allocate(extra_field_names(0))
169 call json_get(params,
"extra_fields", extra_field_names)
170 do i = 1,
size(extra_field_names)
171 fi => neko_registry%get_field(extra_field_names(i))
172 call extra_fields%append(fi)
177 call this%init_from_components(neko_case, algorithm, n_saves_memory, &
178 filename, fmt, keep_checkpoints, extra_fields)
179 end subroutine checkpoint_init_from_json
182 subroutine checkpoint_init_from_components(this, neko_case, algorithm, &
183 n_saves_memory, filename, fmt, keep_checkpoints, extra_fields)
184 class(simulation_checkpoint_t),
intent(inout),
target :: this
185 class(case_t),
target,
intent(inout) :: neko_case
186 character(len=*),
optional,
intent(in) :: algorithm
187 integer,
optional,
intent(in) :: n_saves_memory
188 character(len=*),
optional,
intent(in) :: filename
189 character(len=*),
optional,
intent(in) :: fmt
190 logical,
optional,
intent(in) :: keep_checkpoints
191 type(field_list_t),
optional,
intent(inout) :: extra_fields
192 type(field_t),
pointer :: si
193 character(len=LOG_SIZE) :: msg
199 this%enabled = .true.
200 if (
present(algorithm)) this%algorithm = algorithm
201 if (
present(filename)) this%filename = filename
202 if (
present(n_saves_memory)) this%n_saves_memory = n_saves_memory
203 if (
present(fmt)) this%fmt = fmt
204 if (
present(keep_checkpoints)) this%keep_checkpoints = keep_checkpoints
207 call this%chkp_output%init(neko_case%chkp, this%filename, fmt = this%fmt, &
211 call this%state_list%append(neko_case%fluid%p)
212 call this%state_list%append(neko_case%fluid%u)
213 call this%state_list%append(neko_case%fluid%v)
214 call this%state_list%append(neko_case%fluid%w)
217 if (
allocated(neko_case%scalars))
then
218 do i = 1,
size(neko_case%scalars%scalar_fields)
219 si => neko_case%scalars%scalar_fields(i)%scalar%s
220 call this%state_list%append(si)
225 if (
present(extra_fields))
then
226 do i = 1, extra_fields%size()
227 si => extra_fields%get(i)
228 call this%state_list%append(si)
233 allocate(this%state_storage(this%n_saves_memory, this%state_list%size()))
236 call neko_log%section(
"Checkpointing")
238 write(msg,
'(A, A)')
"Algorithm: ", trim(this%algorithm)
239 call neko_log%message(trim(msg))
240 write(msg,
'(A,I0)')
"Number of checkpoints in RAM: ", this%n_saves_memory
241 call neko_log%message(trim(msg))
242 write(msg,
'(A, A)')
"Checkpoint file name: ", trim(this%filename)
243 call neko_log%message(trim(msg))
244 write(msg,
'(A, A)')
"Checkpoint file format: ", trim(this%fmt)
245 call neko_log%message(trim(msg))
247 if (.not. this%keep_checkpoints)
then
248 call neko_log%message(
"Checkpoint files will be deleted.")
250 call neko_log%message(
"Checkpoint files will be kept.")
253 call neko_log%message(
"Fields in checkpoint:", neko_log_debug)
254 do i = 1, this%state_list%size()
255 si => this%state_list%get(i)
256 call neko_log%message(
" - " // trim(si%name), neko_log_debug)
259 call neko_log%end_section()
261 end subroutine checkpoint_init_from_components
264 subroutine checkpoint_free(this)
265 class(simulation_checkpoint_t),
intent(inout) :: this
267 character(len=1024) :: file_name
269 integer :: stat, unit
272 if (
allocated(this%state_storage))
then
273 do i = 1, this%n_saves_memory
274 do j = 1, this%state_list%size()
275 call this%state_storage(i, j)%free()
280 call this%state_list%free()
281 if (
allocated(this%state_storage))
deallocate(this%state_storage)
284 if (.not. this%keep_checkpoints)
then
285 do i = this%n_timesteps, 1, -1
286 call this%chkp_output%set_counter(i)
287 file_name = this%chkp_output%file_%get_fname()
288 inquire(file = trim(file_name), exist = exists)
290 open(newunit = unit, file = trim(file_name), iostat = stat, &
292 if (stat .eq. 0)
close(unit, status =
'delete')
298 this%enabled = .false.
299 this%filename =
"checkpoint"
301 this%algorithm =
"linear"
302 this%n_saves_memory = 10
303 this%keep_checkpoints = .false.
305 this%n_saves_disc = 0
307 this%first_valid_timestep = 2
308 this%loaded_checkpoint = -1
310 end subroutine checkpoint_free
316 subroutine checkpoint_save(this, neko_case)
317 class(simulation_checkpoint_t),
intent(inout) :: this
318 class(case_t),
intent(inout) :: neko_case
320 if (.not. this%enabled)
return
322 call profiler_start_region(
"Checkpoint save")
325 this%n_timesteps = this%n_timesteps + 1
327 select case (this%algorithm)
329 call checkpoint_save_linear(this, neko_case)
331 call neko_error(
"Unknown checkpoint algorithm: " // this%algorithm)
334 call profiler_end_region(
"Checkpoint save")
335 end subroutine checkpoint_save
338 subroutine checkpoint_restore(this, neko_case, tstep)
339 class(simulation_checkpoint_t),
intent(inout) :: this
340 class(case_t),
target,
intent(inout) :: neko_case
341 integer,
intent(in) :: tstep
342 character(len=256) :: msg
344 if (.not. this%enabled)
return
346 call profiler_start_region(
"Checkpoint restore")
348 if (tstep .lt. 1 .or. tstep .gt. this%n_timesteps)
then
349 write(msg,
'(A,I0,A,I0,A)')
"Requested timestep ", tstep, &
350 " is out of range [1, ", this%n_timesteps,
"]"
351 call neko_error(trim(msg))
354 select case (this%algorithm)
356 call checkpoint_restore_linear(this, neko_case, tstep)
358 call neko_error(
"Unknown checkpoint algorithm: " // this%algorithm)
361 call profiler_end_region(
"Checkpoint restore")
362 end subroutine checkpoint_restore
367 subroutine checkpoint_save_data(this, index)
368 class(simulation_checkpoint_t),
intent(inout) :: this
369 integer,
intent(in) :: index
370 type(field_t),
pointer :: si
372 character(len=1024) :: msg
374 if (index .lt. 1 .or. index .gt. this%n_saves_memory)
then
375 write(msg,
'(A,I0,A,I0,A)')
"Checkpoint save index ", index, &
376 " is out of range [1, ", this%n_saves_memory,
"]"
377 call neko_error(trim(msg))
381 do i = 1, this%state_list%size()
382 if (.not. this%state_storage(index, i)%is_allocated())
then
383 si => this%state_list%get(i)
384 call this%state_storage(index, i)%init(si%size())
389 if (neko_bcknd_device .eq. 0)
then
390 do i = 1, this%state_list%size()
391 si => this%state_list%get(i)
392 call copy(this%state_storage(index, i)%data, si%x, si%size())
395 do i = 1, this%state_list%size()
396 si => this%state_list%get(i)
397 call device_memcpy(this%state_storage(index, i)%data, si%x_d, &
398 si%size(), device_to_host, this%state_list%size() .eq. i)
401 end subroutine checkpoint_save_data
406 subroutine checkpoint_load_data(this, index)
407 class(simulation_checkpoint_t),
intent(inout) :: this
408 integer,
intent(in) :: index
409 type(field_t),
pointer :: si
410 character(len=1024) :: msg
413 if (index .lt. 1 .or. index .gt. this%n_saves_memory)
then
414 write(msg,
'(A,I0,A,I0,A)')
"Checkpoint save index ", index, &
415 " is out of range [1, ", this%n_saves_memory,
"]"
416 call neko_error(trim(msg))
420 if (neko_bcknd_device .eq. 0)
then
421 do i = 1, this%state_list%size()
422 si => this%state_list%get(i)
423 call copy(si%x, this%state_storage(index, i)%data, si%size())
426 do i = 1, this%state_list%size()
427 si => this%state_list%get(i)
428 call device_memcpy(this%state_storage(index, i)%data, si%x_d, &
429 si%size(), host_to_device, this%state_list%size() .eq. i)
433 end subroutine checkpoint_load_data
439 subroutine checkpoint_reset(this)
440 class(simulation_checkpoint_t),
intent(inout) :: this
443 if (.not. this%enabled)
return
446 this%loaded_checkpoint = -1
447 this%n_saves_disc = 0
450 do i = 1,
size(this%state_storage, 1)
451 do j = 1,
size(this%state_storage, 2)
452 call rzero(this%state_storage(i, j)%data, &
453 this%state_storage(i, j)%size)
457 end subroutine checkpoint_reset
462 subroutine host_array_init(this, size)
463 class(host_array),
intent(inout) :: this
464 integer,
intent(in) :: size
468 allocate(this%data(size))
469 call rzero(this%data, this%size)
471 end subroutine host_array_init
473 subroutine host_array_free(this)
474 class(host_array),
intent(inout) :: this
477 if (
allocated(this%data))
deallocate(this%data)
479 end subroutine host_array_free
481 pure function host_array_is_allocated(this)
result(is_alloc)
482 class(host_array),
intent(in) :: this
485 is_alloc =
allocated(this%data)
487 end function host_array_is_allocated
489end module simulation_checkpoint