Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
checkpoint.f90
Go to the documentation of this file.
1
34!
35module simulation_checkpoint
36 use num_types, only: rp, sp, dp
37 use case, only: case_t
38 use json_file_module, only: json_file
39 use json_utils, only: json_get, json_get_or_default
40 use scalar_scheme, only: scalar_scheme_t
41 use time_state, only: time_state_t
42 use chkp_output, only: chkp_output_t
43 use field, only: field_t
44 use field_list, only: field_list_t
45 use logger, only: neko_log, log_size, neko_log_debug
46 use mpi_f08, only: mpi_wtime
47 use utils, only: neko_error
48 use math, only: copy, rzero
49 use profiler, only: profiler_start_region, profiler_end_region
50 use neko_config, only: neko_bcknd_device
51 use device, only: device_memcpy, device_to_host, host_to_device
52 use registry, only: neko_registry
53 implicit none
54 private
55
57 private
58
59 ! ----------------------------------------------------------------------- !
60 ! User parameters
61
63 logical :: enabled = .false.
65 character(len=256) :: algorithm = "linear"
67 character(len=256) :: filename = "checkpoint"
69 character(len=8) :: fmt = "chkp"
71 integer :: n_saves_memory = 10
73 logical :: keep_checkpoints = .false.
74
75 ! Internal parameters
76 integer :: n_saves_disc = 0
77 integer :: n_timesteps = 0
78 integer :: first_valid_timestep = 2
79 integer :: loaded_checkpoint = -1
80
81 ! Field pointers
82 type(field_list_t) :: state_list
83 type(host_array), dimension(:,:), allocatable :: state_storage
84
85 ! Structures to hold the checkpoint data
86 type(chkp_output_t) :: chkp_output
87
88 contains
90 generic, public :: init => init_from_json, init_from_components
92 procedure, public, pass(this) :: init_from_json => &
93 checkpoint_init_from_json
95 procedure, public, pass(this) :: init_from_components => &
96 checkpoint_init_from_components
98 procedure, public, pass(this) :: free => checkpoint_free
100 procedure, public, pass(this) :: reset => checkpoint_reset
102 procedure, public, pass(this) :: save => checkpoint_save
104 procedure, public, pass(this) :: restore => checkpoint_restore
105
107 procedure, pass(this) :: save_data => checkpoint_save_data
109 procedure, pass(this) :: load_data => checkpoint_load_data
111
112 type :: host_array
113 real(kind=rp), allocatable :: data(:)
114 integer :: size = 0
115 contains
116 procedure, pass(this) :: init => host_array_init
117 procedure, pass(this) :: free => host_array_free
118 procedure, pass(this) :: is_allocated => host_array_is_allocated
119 end type host_array
120
121 ! ========================================================================== !
122 ! Module procedures for our algorithm implementations.
123
124 interface
125
126 module subroutine checkpoint_save_linear(this, neko_case)
127 class(simulation_checkpoint_t), intent(inout) :: this
128 class(case_t), intent(inout) :: neko_case
129 end subroutine checkpoint_save_linear
130
132 module subroutine checkpoint_restore_linear(this, neko_case, tstep)
133 class(simulation_checkpoint_t), intent(inout) :: this
134 class(case_t), target, intent(inout) :: neko_case
135 integer, intent(in) :: tstep
136 end subroutine checkpoint_restore_linear
137 end interface
138
139contains
140
141 ! ========================================================================== !
142 ! Initialization and deallocation
143
145 subroutine checkpoint_init_from_json(this, neko_case, params)
146 class(simulation_checkpoint_t), intent(inout) :: this
147 class(case_t), target, intent(inout) :: neko_case
148 type(json_file), target, intent(inout) :: params
149 integer :: n_saves_memory
150 character(len=:), allocatable :: filename, algorithm, fmt
151 character(len=256), dimension(:), allocatable :: extra_field_names
152 type(field_list_t) :: extra_fields
153 type(field_t), pointer :: fi
154 integer :: i
155 logical :: enabled, keep_checkpoints
156
157 call json_get_or_default(params, "enabled", enabled, .false.)
158 if (.not. enabled) return
159
160 call json_get_or_default(params, "algorithm", algorithm, "linear")
161 call json_get_or_default(params, "n_memory", n_saves_memory, 10)
162 call json_get_or_default(params, "filename", filename, "checkpoint")
163 call json_get_or_default(params, "format", fmt, "chkp")
164 call json_get_or_default(params, "keep_checkpoints", keep_checkpoints, &
165 .false.)
166
167 if ("extra_fields" .in. params) then
168 allocate(extra_field_names(0))
169 call json_get(params, "extra_fields", extra_field_names)
170 do i = 1, size(extra_field_names)
171 fi => neko_registry%get_field(extra_field_names(i))
172 call extra_fields%append(fi)
173 end do
174 end if
175
176 ! Create a field list for the extra fields
177 call this%init_from_components(neko_case, algorithm, n_saves_memory, &
178 filename, fmt, keep_checkpoints, extra_fields)
179 end subroutine checkpoint_init_from_json
180
182 subroutine checkpoint_init_from_components(this, neko_case, algorithm, &
183 n_saves_memory, filename, fmt, keep_checkpoints, extra_fields)
184 class(simulation_checkpoint_t), intent(inout), target :: this
185 class(case_t), target, intent(inout) :: neko_case
186 character(len=*), optional, intent(in) :: algorithm
187 integer, optional, intent(in) :: n_saves_memory
188 character(len=*), optional, intent(in) :: filename
189 character(len=*), optional, intent(in) :: fmt
190 logical, optional, intent(in) :: keep_checkpoints
191 type(field_list_t), optional, intent(inout) :: extra_fields
192 type(field_t), pointer :: si
193 character(len=LOG_SIZE) :: msg
194 integer :: i
195
196 call this%free()
197
198 ! Assign parameters from arguments or defaults
199 this%enabled = .true.
200 if (present(algorithm)) this%algorithm = algorithm
201 if (present(filename)) this%filename = filename
202 if (present(n_saves_memory)) this%n_saves_memory = n_saves_memory
203 if (present(fmt)) this%fmt = fmt
204 if (present(keep_checkpoints)) this%keep_checkpoints = keep_checkpoints
205
206 ! Initialize the Neko checkpoint output
207 call this%chkp_output%init(neko_case%chkp, this%filename, fmt = this%fmt, &
208 overwrite = .true.)
209
210 ! Assign fluid pointers
211 call this%state_list%append(neko_case%fluid%p)
212 call this%state_list%append(neko_case%fluid%u)
213 call this%state_list%append(neko_case%fluid%v)
214 call this%state_list%append(neko_case%fluid%w)
215
216 ! Assign scalar pointers
217 if (allocated(neko_case%scalars)) then
218 do i = 1, size(neko_case%scalars%scalar_fields)
219 si => neko_case%scalars%scalar_fields(i)%scalar%s
220 call this%state_list%append(si)
221 end do
222 end if
223
224 ! Assign any extra fields specified by the user
225 if (present(extra_fields)) then
226 do i = 1, extra_fields%size()
227 si => extra_fields%get(i)
228 call this%state_list%append(si)
229 end do
230 end if
231
232 ! Allocate the storage for the RAM checkpoints
233 allocate(this%state_storage(this%n_saves_memory, this%state_list%size()))
234
235 ! Write a status message with the parameters set
236 call neko_log%section("Checkpointing")
237
238 write(msg, '(A, A)') "Algorithm: ", trim(this%algorithm)
239 call neko_log%message(trim(msg))
240 write(msg, '(A,I0)') "Number of checkpoints in RAM: ", this%n_saves_memory
241 call neko_log%message(trim(msg))
242 write(msg, '(A, A)') "Checkpoint file name: ", trim(this%filename)
243 call neko_log%message(trim(msg))
244 write(msg, '(A, A)') "Checkpoint file format: ", trim(this%fmt)
245 call neko_log%message(trim(msg))
246
247 if (.not. this%keep_checkpoints) then
248 call neko_log%message("Checkpoint files will be deleted.")
249 else
250 call neko_log%message("Checkpoint files will be kept.")
251 end if
252
253 call neko_log%message("Fields in checkpoint:", neko_log_debug)
254 do i = 1, this%state_list%size()
255 si => this%state_list%get(i)
256 call neko_log%message(" - " // trim(si%name), neko_log_debug)
257 end do
258
259 call neko_log%end_section()
260
261 end subroutine checkpoint_init_from_components
262
264 subroutine checkpoint_free(this)
265 class(simulation_checkpoint_t), intent(inout) :: this
266 integer :: i, j
267 character(len=1024) :: file_name
268 logical :: exists
269 integer :: stat, unit
270
271 ! Free the RAM Checkpoints
272 if (allocated(this%state_storage)) then
273 do i = 1, this%n_saves_memory
274 do j = 1, this%state_list%size()
275 call this%state_storage(i, j)%free()
276 end do
277 end do
278 end if
279
280 call this%state_list%free()
281 if (allocated(this%state_storage)) deallocate(this%state_storage)
282
283 ! Delete the checkpoint file list
284 if (.not. this%keep_checkpoints) then
285 do i = this%n_timesteps, 1, -1
286 call this%chkp_output%set_counter(i)
287 file_name = this%chkp_output%file_%get_fname()
288 inquire(file = trim(file_name), exist = exists)
289 if (exists) then
290 open(newunit = unit, file = trim(file_name), iostat = stat, &
291 status='old')
292 if (stat .eq. 0) close(unit, status = 'delete')
293 end if
294 end do
295 end if
296
297 ! Reset to default values
298 this%enabled = .false.
299 this%filename = "checkpoint"
300 this%fmt = "chkp"
301 this%algorithm = "linear"
302 this%n_saves_memory = 10
303 this%keep_checkpoints = .false.
304
305 this%n_saves_disc = 0
306 this%n_timesteps = 0
307 this%first_valid_timestep = 2
308 this%loaded_checkpoint = -1
309
310 end subroutine checkpoint_free
311
312 ! ========================================================================== !
313 ! Saving and Restoring
314
316 subroutine checkpoint_save(this, neko_case)
317 class(simulation_checkpoint_t), intent(inout) :: this
318 class(case_t), intent(inout) :: neko_case
319
320 if (.not. this%enabled) return
321
322 call profiler_start_region("Checkpoint save")
323
324 ! Update the number of recorded timesteps
325 this%n_timesteps = this%n_timesteps + 1
326
327 select case (this%algorithm)
328 case ("linear")
329 call checkpoint_save_linear(this, neko_case)
330 case default
331 call neko_error("Unknown checkpoint algorithm: " // this%algorithm)
332 end select
333
334 call profiler_end_region("Checkpoint save")
335 end subroutine checkpoint_save
336
338 subroutine checkpoint_restore(this, neko_case, tstep)
339 class(simulation_checkpoint_t), intent(inout) :: this
340 class(case_t), target, intent(inout) :: neko_case
341 integer, intent(in) :: tstep
342 character(len=256) :: msg
343
344 if (.not. this%enabled) return
345
346 call profiler_start_region("Checkpoint restore")
347
348 if (tstep .lt. 1 .or. tstep .gt. this%n_timesteps) then
349 write(msg, '(A,I0,A,I0,A)') "Requested timestep ", tstep, &
350 " is out of range [1, ", this%n_timesteps, "]"
351 call neko_error(trim(msg))
352 end if
353
354 select case (this%algorithm)
355 case ("linear")
356 call checkpoint_restore_linear(this, neko_case, tstep)
357 case default
358 call neko_error("Unknown checkpoint algorithm: " // this%algorithm)
359 end select
360
361 call profiler_end_region("Checkpoint restore")
362 end subroutine checkpoint_restore
363
367 subroutine checkpoint_save_data(this, index)
368 class(simulation_checkpoint_t), intent(inout) :: this
369 integer, intent(in) :: index
370 type(field_t), pointer :: si
371 integer :: i
372 character(len=1024) :: msg
373
374 if (index .lt. 1 .or. index .gt. this%n_saves_memory) then
375 write(msg, '(A,I0,A,I0,A)') "Checkpoint save index ", index, &
376 " is out of range [1, ", this%n_saves_memory, "]"
377 call neko_error(trim(msg))
378 end if
379
380 ! Allocate the RAM checkpoint if not already allocated
381 do i = 1, this%state_list%size()
382 if (.not. this%state_storage(index, i)%is_allocated()) then
383 si => this%state_list%get(i)
384 call this%state_storage(index, i)%init(si%size())
385 end if
386 end do
387
388 ! Save the current iterates to memory
389 if (neko_bcknd_device .eq. 0) then
390 do i = 1, this%state_list%size()
391 si => this%state_list%get(i)
392 call copy(this%state_storage(index, i)%data, si%x, si%size())
393 end do
394 else
395 do i = 1, this%state_list%size()
396 si => this%state_list%get(i)
397 call device_memcpy(this%state_storage(index, i)%data, si%x_d, &
398 si%size(), device_to_host, this%state_list%size() .eq. i)
399 end do
400 end if
401 end subroutine checkpoint_save_data
402
406 subroutine checkpoint_load_data(this, index)
407 class(simulation_checkpoint_t), intent(inout) :: this
408 integer, intent(in) :: index
409 type(field_t), pointer :: si
410 character(len=1024) :: msg
411 integer :: i
412
413 if (index .lt. 1 .or. index .gt. this%n_saves_memory) then
414 write(msg, '(A,I0,A,I0,A)') "Checkpoint save index ", index, &
415 " is out of range [1, ", this%n_saves_memory, "]"
416 call neko_error(trim(msg))
417 end if
418
419 ! Save the current iterates to memory
420 if (neko_bcknd_device .eq. 0) then
421 do i = 1, this%state_list%size()
422 si => this%state_list%get(i)
423 call copy(si%x, this%state_storage(index, i)%data, si%size())
424 end do
425 else
426 do i = 1, this%state_list%size()
427 si => this%state_list%get(i)
428 call device_memcpy(this%state_storage(index, i)%data, si%x_d, &
429 si%size(), host_to_device, this%state_list%size() .eq. i)
430 end do
431
432 end if
433 end subroutine checkpoint_load_data
434
435 ! ========================================================================== !
436 ! Meta handling
437
439 subroutine checkpoint_reset(this)
440 class(simulation_checkpoint_t), intent(inout) :: this
441 integer :: i, j
442
443 if (.not. this%enabled) return
444
445 ! Reset our checkpoints
446 this%loaded_checkpoint = -1
447 this%n_saves_disc = 0
448 this%n_timesteps = 0
449
450 do i = 1, size(this%state_storage, 1)
451 do j = 1, size(this%state_storage, 2)
452 call rzero(this%state_storage(i, j)%data, &
453 this%state_storage(i, j)%size)
454 end do
455 end do
456
457 end subroutine checkpoint_reset
458
459 ! -------------------------------------------------------------------------- !
460 ! Host array routines
461
462 subroutine host_array_init(this, size)
463 class(host_array), intent(inout) :: this
464 integer, intent(in) :: size
465
466 call this%free()
467 this%size = size
468 allocate(this%data(size))
469 call rzero(this%data, this%size)
470
471 end subroutine host_array_init
472
473 subroutine host_array_free(this)
474 class(host_array), intent(inout) :: this
475
476 this%size = 0
477 if (allocated(this%data)) deallocate(this%data)
478
479 end subroutine host_array_free
480
481 pure function host_array_is_allocated(this) result(is_alloc)
482 class(host_array), intent(in) :: this
483 logical :: is_alloc
484
485 is_alloc = allocated(this%data)
486
487 end function host_array_is_allocated
488
489end module simulation_checkpoint