Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
mma_cpu.f90
1
33
34submodule(mma) mma_cpu
35 use lapack_interfaces, only: dgesv
36 use mpi_f08, only: mpi_in_place, mpi_max, mpi_min
37 use comm, only: neko_comm, pe_rank, mpi_real_precision
38 use math, only: neko_eps
39 use profiler, only: profiler_start_region, profiler_end_region
40
41 implicit none
42
43contains
44
45 module subroutine mma_update_cpu(this, iter, x, df0dx, fval, dfdx)
46 ! ----------------------------------------------------- !
47 ! Update the design variable x by solving the convex !
48 ! approximation of the problem. !
49 ! !
50 ! This subroutine is called in each iteration of the !
51 ! optimization loop !
52 ! ----------------------------------------------------- !
53 class(mma_t), intent(inout) :: this
54 integer, intent(in) :: iter
55 real(kind=rp), dimension(this%n), intent(inout) :: x
56 real(kind=rp), dimension(this%n), intent(in) :: df0dx
57 real(kind=rp), dimension(this%m), intent(in) :: fval
58 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
59
60 if (.not. this%is_initialized) then
61 call neko_error("The MMA object is not initialized.")
62 end if
63
64 call profiler_start_region("MMA update")
65
66 ! generate a convex approximation of the problem
67 call profiler_start_region("MMA gensub")
68 call mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
69 call profiler_end_region("MMA gensub")
70
71 !solve the approximation problem using interior point method
72 call profiler_start_region("MMA subsolve")
73 if (this%subsolver .eq. "dip") then
74 call mma_subsolve_dip_cpu(this, x)
75 else
76 call mma_subsolve_dpip_cpu(this, x)
77 end if
78 call profiler_end_region("MMA subsolve")
79
80 call profiler_end_region("MMA update")
81
82 this%is_updated = .true.
83 end subroutine mma_update_cpu
84
86 module subroutine mma_kkt_cpu(this, x, df0dx, fval, dfdx)
87 ! ----------------------------------------------------- !
88 ! Compute the KKT condition right hand side for a given !
89 ! designx x and set the max and norm values of the !
90 ! residue of KKT system to this%residumax and !
91 ! this%residunorm. !
92 ! !
93 ! The left hand sides of the KKT conditions are computed!
94 ! for the following nonlinear programming problem: !
95 ! Minimize f_0(x) + a_0*z + !
96 ! sum(c_i*y_i + 0.5*d_i*(y_i)^2)!
97 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
98 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
99 ! z >= 0, y_i >= 0, i = 1,...,m !
100 ! !
101 ! !
102 ! Note that before calling this function, the function !
103 ! values (f0val, fval, dfdx, ...) should be updated !
104 ! using the new x values. !
105 ! ----------------------------------------------------- !
106 class(mma_t), intent(inout) :: this
107 real(kind=rp), dimension(this%n), intent(in) :: x
108 real(kind=rp), dimension(this%n), intent(in) :: df0dx
109 real(kind=rp), dimension(this%m), intent(in) :: fval
110 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
111
112 call profiler_start_region("MMA KKT computation")
113
114 if (this%subsolver .eq. "dip") then
115 call mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
116 else
117 call mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
118 end if
119
120 call profiler_end_region("MMA KKT computation")
121 end subroutine mma_kkt_cpu
122
124 ! point method (dpip) subsolve of MMA algorithm.
125 module subroutine mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
126 ! ----------------------------------------------------- !
127 ! Compute the KKT condition right hand side for a given !
128 ! designx x and set the max and norm values of the !
129 ! residue of KKT system to this%residumax and !
130 ! this%residunorm. !
131 ! !
132 ! The left hand sides of the KKT conditions are computed!
133 ! for the following nonlinear programming problem: !
134 ! Minimize f_0(x) + a_0*z + !
135 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
136 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
137 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
138 ! z >= 0, y_i >= 0, i = 1,...,m !
139 ! !
140 ! !
141 ! Note that before calling this function, the function !
142 ! values (f0val, fval, dfdx, ...) should be updated !
143 ! using the new x values. !
144 ! ----------------------------------------------------- !
145 class(mma_t), intent(inout) :: this
146 real(kind=rp), dimension(this%n), intent(in) :: x
147 real(kind=rp), dimension(this%n), intent(in) :: df0dx
148 real(kind=rp), dimension(this%m), intent(in) :: fval
149 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
150
151 real(kind=rp) :: rez, rezeta
152 real(kind=rp), dimension(this%m) :: rey, relambda, remu, res
153 real(kind=rp), dimension(this%n) :: rex, rexsi, reeta
154 real(kind=rp), dimension(3*this%n+4*this%m+2) :: residual
155
156 real(kind=rp), dimension(4*this%m+2) :: residual_small
157 integer :: ierr
158 real(kind=rp) :: re_sq_norm
159
160 rex = df0dx + matmul(transpose(dfdx), this%lambda%x) &
161 - this%xsi%x + this%eta%x
162 rey = this%c%x + this%d%x*this%y%x - this%lambda%x - this%mu%x
163 rez = this%a0 - this%zeta - dot_product(this%lambda%x, this%a%x)
164
165 relambda = fval - this%a%x * this%z - this%y%x + this%s%x
166 rexsi = this%xsi%x * (x - this%xmin%x)
167 reeta = this%eta%x * (this%xmax%x - x)
168 remu = this%mu%x * this%y%x
169 rezeta = this%zeta * this%z
170 res = this%lambda%x * this%s%x
171
172 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
173 residual_small = [rey, rez, relambda, remu, rezeta, res]
174
175 this%residumax = maxval(abs(residual))
176 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
177
178 call mpi_allreduce(mpi_in_place, this%residumax, 1, &
179 mpi_real_precision, mpi_max, neko_comm, ierr)
180
181 call mpi_allreduce(mpi_in_place, re_sq_norm, 1, &
182 mpi_real_precision, mpi_sum, neko_comm, ierr)
183
184 this%residunorm = sqrt(norm2(residual_small)**2 + re_sq_norm)
185 end subroutine mma_dpip_kkt_cpu
186
188 ! point method (dip) subsolve of MMA algorithm.
189 module subroutine mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
190 ! ----------------------------------------------------- !
191 ! Compute the KKT condition right hand side for a given !
192 ! designx x and set the max and norm values of the !
193 ! residue of KKT system to this%residumax and !
194 ! this%residunorm. !
195 ! !
196 ! The left hand sides of the KKT conditions are computed!
197 ! for the following nonlinear programming problem: !
198 ! Minimize f_0(x) + a_0*z + !
199 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
200 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
201 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
202 ! z >= 0, y_i >= 0, i = 1,...,m !
203 ! !
204 ! !
205 ! Note that before calling this function, the function !
206 ! values (f0val, fval, dfdx, ...) should be updated !
207 ! using the new x values. !
208 ! ----------------------------------------------------- !
209 class(mma_t), intent(inout) :: this
210 real(kind=rp), dimension(this%n), intent(in) :: x
211 real(kind=rp), dimension(this%n), intent(in) :: df0dx
212 real(kind=rp), dimension(this%m), intent(in) :: fval
213 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
214
215 real(kind=rp), dimension(this%m) :: relambda, remu
216 real(kind=rp), dimension(2*this%m) :: residual
217
218
219 relambda = fval - this%a%x * this%z - this%y%x + this%mu%x
220 ! Compute residual for mu (eta in the paper)
221 remu = this%lambda%x * this%mu%x
222
223 residual = abs([relambda, remu])
224 this%residumax = maxval(residual)
225 this%residunorm = norm2(residual)
226
227 end subroutine mma_dip_kkt_cpu
228
229 !============================================================================!
230 ! private internal subroutines
231
233 subroutine mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
234 ! ----------------------------------------------------- !
235 ! Generate the approximation sub problem by computing !
236 ! the lower and upper asymtotes and the other necessary !
237 ! parameters (alpha, beta, p0j, q0j, pij, qij, ...). !
238 ! ----------------------------------------------------- !
239 class(mma_t), intent(inout) :: this
240 real(kind=rp), dimension(this%n), intent(in) :: x
241 real(kind=rp), dimension(this%n), intent(in) :: df0dx
242 real(kind=rp), dimension(this%m), intent(in) :: fval
243 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
244 integer, intent(in) :: iter
245 integer :: i, j, ierr
246 real(kind=rp), dimension(this%n) :: xmin_eff, xmax_eff
247 real(kind=rp), dimension(this%n) :: x_diff
248 real(kind=rp) :: asy_factor
249
250 xmin_eff = this%xmin%x
251 xmax_eff = this%xmax%x
252 if (this%move_limit .gt. 0.0_rp) then
253 xmin_eff = max(xmin_eff, x - this%move_limit)
254 xmax_eff = min(xmax_eff, x + this%move_limit)
255 end if
256
257 x_diff = max(xmax_eff - xmin_eff, 1.0e-5_rp)
258
259 ! ------------------------------------------------------------------------ !
260 ! Setup the current asymptotes
261 associate(low => this%low%x, upp => this%upp%x, &
262 x_1 => this%xold1%x, x_2 => this%xold2%x)
263
264 if (iter .lt. 3) then
265 ! Initialize the lower and upper asymptotes
266 low = x - this%asyinit * x_diff
267 upp = x + this%asyinit * x_diff
268 else
269 do j = 1, this%n
270 if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .lt. 0.0_rp) then
271 asy_factor = this%asydecr
272 else if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .gt. 0.0_rp) then
273 asy_factor = this%asyincr
274 else
275 asy_factor = 1.0_rp
276 end if
277
278 low(j) = x(j) - asy_factor * (x_1(j) - low(j))
279 upp(j) = x(j) + asy_factor * (upp(j) - x_1(j))
280 end do
281
282
283 ! Setting a minimum and maximum for the low and upp
284 ! asymptotes (eq3.9)
285 low = max(low, x - 10.0_rp * x_diff)
286 low = min(low, x - 0.01_rp * x_diff)
287
288 upp = min(upp, x + 10.0_rp * x_diff)
289 upp = max(upp, x + 0.01_rp * x_diff)
290 end if
291
292 end associate
293
294
295 ! ------------------------------------------------------------------------ !
296 ! Set the the bounds and coefficients for the approximation
297 ! the move bounds (alpha and beta) are slightly more restrictive
298 ! than low and upp. This is done based on eq(3.6)--eq(3.10).
299 ! also check
300 ! https://comsolyar.com/wp-content/uploads/2020/03/gcmma.pdf
301 ! eq (2.8) and (2.9)
302
303 associate(alpha => this%alpha%x, beta => this%beta%x, &
304 xmin => xmin_eff, xmax => xmax_eff, &
305 low => this%low%x, upp => this%upp%x, x => x)
306
307 alpha = max(xmin, low + 0.1_rp*(x - low), x - 0.5_rp*x_diff)
308 beta = min(xmax, upp - 0.1_rp*(upp - x), x + 0.5_rp*x_diff)
309 end associate
310
311 ! ------------------------------------------------------------------------ !
312 ! Calculate p0j, q0j, pij, qij
313 ! where j = 1,2,...,n and i = 1,2,...,m (eq(2.3)-eq(2.5))
314
315 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
316 pij => this%pij%x, qij => this%qij%x, &
317 low => this%low%x, upp => this%upp%x)
318
319 p0j = ( &
320 1.001_rp * max(df0dx, 0.0_rp) &
321 + 0.001_rp * max(-df0dx, 0.0_rp) &
322 + 0.00001_rp / max(x_diff, 0.00001_rp) &
323 ) * (upp - x)**2
324
325 q0j = ( &
326 0.001_rp * max(df0dx, 0.0_rp) &
327 + 1.001_rp * max(-df0dx, 0.0_rp) &
328 + 0.00001_rp / max(x_diff, 0.00001_rp)&
329 ) * (x - low)**2
330
331 do j = 1, this%n
332 do i = 1, this%m
333 pij(i, j) = ( &
334 1.001_rp * max(dfdx(i, j), 0.0_rp) &
335 + 0.001_rp * max(-dfdx(i, j), 0.0_rp) &
336 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
337 ) * (upp(j) - x(j))**2
338
339 qij(i, j) = ( &
340 0.001_rp * max(dfdx(i, j), 0.0_rp) &
341 + 1.001_rp * max(-dfdx(i, j), 0.0_rp) &
342 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
343 ) * (x(j) - low(j))**2
344 end do
345 end do
346
347 end associate
348
349 ! ------------------------------------------------------------------------ !
350 ! Computing bi as defined in page 5
351
352 associate(bi => this%bi%x, &
353 pij => this%pij%x, qij => this%qij%x, &
354 low => this%low%x, upp => this%upp%x)
355
356 bi = 0.0_rp
357 do i = 1, this%m
358 do j = 1, this%n
359 bi(i) = bi(i) &
360 + pij(i, j) / (upp(j) - x(j)) &
361 + qij(i, j) / (x(j) - low(j))
362 end do
363 end do
364
365 call mpi_allreduce(mpi_in_place, bi, this%m, &
366 mpi_real_precision, mpi_sum, neko_comm, ierr)
367 bi = bi - fval
368
369 end associate
370 end subroutine mma_gensub_cpu
371
374 subroutine mma_subsolve_dpip_cpu(this, designx)
375 ! ------------------------------------------------------- !
376 ! Dual-primal interior point method using Newton's step !
377 ! to solve MMA sub problem. !
378 ! A Backtracking Line Search approach is used to compute !
379 ! the step size; starting with the full Newton's step !
380 ! (delta = 1) and dividing by 2 until we have a step size !
381 ! that leads to a feasible point while ensuring a !
382 ! decrease in the residue. !
383 ! ------------------------------------------------------- !
384
385 class(mma_t), intent(inout) :: this
386 real(kind=rp), dimension(this%n), intent(inout) :: designx
387 ! Note that there is a local dummy "x" in this subroutine, thus, we call
388 ! the current design "designx" instead of just "x"
389 integer :: i, j, k, iter, itto, ierr
390 real(kind=rp) :: epsi, residual_max, residual_norm, &
391 z, zeta, rez, rezeta, &
392 delz, dz, dzeta, &
393 steg, zold, zetaold, new_residual
394 real(kind=rp), dimension(this%m) :: y, lambda, s, mu, &
395 rey, relambda, remu, res, &
396 dely, dellambda, &
397 dy, dlambda, ds, dmu, &
398 yold, lambdaold, sold, muold
399 real(kind=rp), dimension(this%n) :: x, xsi, eta, &
400 rex, rexsi, reeta, &
401 delx, diagx, dx, dxsi, deta, &
402 xold, xsiold, etaold
403 real(kind=rp), dimension(4*this%m + 2) :: residual_small
404 real(kind=rp), dimension(3*this%n + 4*this%m + 2) :: residual
405 real(kind=rp), dimension(2*this%n + 4*this%m + 2) :: xx, dxx
406
407 real(kind=rp), dimension(this%m, this%n) :: gg
408 real(kind=rp), dimension(this%m+1) :: bb
409 real(kind=rp), dimension(this%m+1, this%m+1) :: aa
410 real(kind=rp), dimension(this%m * this%m) :: aa_buffer
411
412 ! using DGESV in lapack to solve
413 ! the linear system which needs the following parameters
414 integer :: info
415 integer, dimension(this%m+1) :: ipiv
416
417 ! Parameters for global communication
418 real(kind=rp) :: re_sq_norm
419 real(kind=rp) :: minimal_epsilon
420
421 ! ------------------------------------------------------------------------ !
422 ! initial value for the parameters in the subsolve based on
423 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
424
425 epsi = 1.0_rp !100
426 x = 0.5_rp * (this%alpha%x + this%beta%x)
427 y = 1.0_rp
428 z = 1.0_rp
429 zeta = 1.0_rp
430 lambda = 1.0_rp
431 s = 1.0_rp
432 xsi = max(1.0_rp, 1.0_rp / (x - this%alpha%x))
433 eta = max(1.0_rp, 1.0_rp / (this%beta%x - x))
434 mu = max(1.0_rp, 0.5_rp * this%c%x)
435
436 ! ------------------------------------------------------------------------ !
437 ! Computing the minimal epsilon and choose the most conservative one
438
439 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
440 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
441 mpi_real_precision, mpi_min, neko_comm, ierr)
442
443 ! ------------------------------------------------------------------------ !
444 ! The main loop of the dual-primal interior point method.
445
446 do while (epsi .gt. minimal_epsilon)
447
448 ! --------------------------------------------------------------------- !
449 ! Calculating residuals based on
450 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
451 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
452
453 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
454 pij => this%pij%x, qij => this%qij%x, &
455 low => this%low%x, upp => this%upp%x, &
456 alpha => this%alpha%x, beta => this%beta%x, &
457 c => this%c%x, d => this%d%x, &
458 a0 => this%a0, a => this%a%x)
459
460 rex = (p0j + matmul(transpose(pij), lambda)) / (upp - x)**2 &
461 - (q0j + matmul(transpose(qij), lambda)) / (x - low)**2 &
462 - xsi + eta
463
464 rey = c + d * y - lambda - mu
465 rez = a0 - zeta - dot_product(lambda, a)
466
467 relambda = 0.0_rp
468 do i = 1, this%m
469 do j = 1, this%n
470 ! Accumulate sums for relambda (the term gi(x))
471 relambda(i) = relambda(i) &
472 + pij(i, j) / (upp(j) - x(j)) &
473 + qij(i, j) / (x(j) - low(j))
474 end do
475 end do
476
477 end associate
478
479 ! --------------------------------------------------------------------- !
480 ! Computing the norm of the residuals
481
482 ! Complete the computations of lambda residuals
483 call mpi_allreduce(mpi_in_place, relambda, this%m, &
484 mpi_real_precision, mpi_sum, neko_comm, ierr)
485 relambda = relambda - this%a%x*z - y + s - this%bi%x
486
487 rexsi = xsi * (x - this%alpha%x) - epsi
488 reeta = eta * (this%beta%x - x) - epsi
489 remu = mu * y - epsi
490 rezeta = zeta * z - epsi
491 res = lambda * s - epsi
492
493 ! Setup vectors of residuals and their norms
494 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
495 residual_small = [rey, rez, relambda, remu, rezeta, res]
496
497 residual_max = maxval(abs(residual))
498 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
499
500 call mpi_allreduce(mpi_in_place, residual_max, 1, &
501 mpi_real_precision, mpi_max, neko_comm, ierr)
502
503 call mpi_allreduce(mpi_in_place, re_sq_norm, &
504 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
505
506 residual_norm = sqrt(norm2(residual_small)**2 + re_sq_norm)
507
508 ! --------------------------------------------------------------------- !
509 ! Internal loop
510
511 do iter = 1, this%max_iter
512
513 !Check the condition
514 if (residual_max .lt. epsi) exit
515
516 delx = 0.0_rp
517 do j = 1, this%n
518 do i = 1, this%m
519 delx(j) = delx(j) &
520 + this%pij%x(i,j) * lambda(i) / (this%upp%x(j) - x(j))**2 &
521 - this%qij%x(i,j) * lambda(i) / (x(j) - this%low%x(j))**2
522 end do
523 end do
524
525 delx = delx &
526 + this%p0j%x / (this%upp%x - x)**2 &
527 - this%q0j%x / (x - this%low%x)**2 &
528 - epsi / (x - this%alpha%x) &
529 + epsi / (this%beta%x - x)
530
531 dely = this%c%x + this%d%x * y - lambda - epsi / y
532 delz = this%a0 - dot_product(lambda, this%a%x) - epsi / z
533
534 ! Accumulate sums for dellambda (the term gi(x))
535 dellambda = 0.0_rp
536 do i = 1, this%m
537 do j = 1, this%n
538 dellambda(i) = dellambda(i) &
539 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
540 + this%qij%x(i, j) / (x(j) - this%low%x(j))
541 end do
542 end do
543
544 call mpi_allreduce(mpi_in_place, dellambda, this%m, &
545 mpi_real_precision, mpi_sum, neko_comm, ierr)
546
547 dellambda = dellambda - this%a%x*z - y - this%bi%x + epsi / lambda
548
549 do i = 1, this%m
550 gg(i,:) = this%pij%x(i,:) / (this%upp%x - x)**2 &
551 - this%qij%x(i,:) / (x - this%low%x)**2
552 end do
553
554 diagx = &
555 (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
556 / (this%upp%x - x)**3 &
557 + (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
558 / (x - this%low%x)**3
559
560 diagx = 2.0_rp * diagx &
561 + xsi / (x - this%alpha%x) &
562 + eta / (this%beta%x - x)
563
564
565 !Here we only consider the case m<n in the matlab code
566 !assembling the right hand side matrix based on eq(5.20)
567 ! bb = [dellambda + dely/(this%d%x + &
568 ! (mu/y)) - matmul(GG,delx/diagx), delz ]
569
570 !--------------------------------------------------------------------!
571 ! for MPI computation of bb
572
573 bb = 0.0_rp
574 do i = 1, this%m
575 do j = 1, this%n
576 bb(i) = bb(i) + gg(i, j) * (delx(j) / diagx(j))
577 end do
578 end do
579
580 call mpi_allreduce(mpi_in_place, bb, this%m, &
581 mpi_real_precision, mpi_sum, neko_comm, ierr)
582
583 bb(1:this%m) = dellambda + dely / (this%d%x + mu / y) - bb(1:this%m)
584 bb(this%m + 1) = delz
585
586 !--------------------------------------------------------------------!
587 ! assembling the coefficients matrix AA based on eq(5.20)
588 ! AA(1:this%m,1:this%m) = &
589 ! matmul(matmul(GG,mma_diag(1/diagx)), transpose(GG))
590 ! !update diag(AA)
591 ! AA(1:this%m,1:this%m) = AA(1:this%m,1:this%m) + &
592 ! mma_diag(s/lambda + 1.0/(this%d%x + (mu/y)))
593
594 aa = 0.0_rp
595 ! Direct computation of the matrix multiplication
596 ! (for better performance)
597 do i = 1, this%m
598 do j = 1, this%m
599 ! Compute the (i, j) element of AA
600 do k = 1, this%n !this n is global
601 aa(i, j) = aa(i, j) &
602 + gg(i, k) * (1.0_rp / diagx(k)) * gg(j, k)
603 end do
604 end do
605 end do
606
607 aa_buffer = reshape(aa(1:this%m, 1:this%m), [this%m * this%m])
608
609 call mpi_allreduce(mpi_in_place, aa_buffer, &
610 this%m*this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
611
612 aa(1:this%m, 1:this%m) = reshape(aa_buffer, [this%m, this%m])
613
614 do i = 1, this%m
615 ! update the diag AA
616 aa(i, i) = aa(i, i) &
617 + s(i) / lambda(i) &
618 + 1.0_rp / (this%d%x(i) + mu(i) / y(i))
619 end do
620
621 aa(1:this%m, this%m+1) = this%a%x
622 aa(this%m+1, 1:this%m) = this%a%x
623 aa(this%m+1, this%m+1) = - zeta/z
624
625 call dgesv(this%m + 1, 1, aa, this%m + 1, ipiv, bb, this%m + 1, info)
626
627 if (info .ne. 0) then
628 call neko_error("DGESV failed to solve the linear system in " // &
629 "mma_subsolve_dpip.")
630 end if
631
632
633 dlambda = bb(1:this%m)
634 dz = bb(this%m + 1)
635
636 ! based on eq(5.19)
637 dx = - delx / diagx - matmul(transpose(gg), dlambda) / diagx
638 dy = (-dely + dlambda) / (this%d%x + mu / y)
639
640 dxsi = -xsi + (epsi - dx * xsi) / (x - this%alpha%x)
641 deta = -eta + (epsi + dx * eta) / (this%beta%x - x)
642 dmu = -mu + (epsi - mu * dy) / y
643 dzeta = -zeta + (epsi - zeta * dz) / z
644 ds = -s + (epsi - dlambda * s) / lambda
645
646 dxx = [dy, dz, dlambda, dxsi, deta, dmu, dzeta, ds]
647 xx = [y, z, lambda, xsi, eta, mu, zeta, s]
648
649 steg = 1.0_rp / maxval([ &
650 1.0_rp, &
651 -1.01_rp * dxx / xx, &
652 -1.01_rp * dx / (x - this%alpha%x), &
653 1.01_rp * dx / (this%beta%x - x) &
654 ])
655
656 ! Save the old values
657 xold = x
658 yold = y
659 zold = z
660 lambdaold = lambda
661 xsiold = xsi
662 etaold = eta
663 muold = mu
664 zetaold = zeta
665 sold = s
666
667 new_residual = 2.0_rp * residual_norm
668
669 ! Share the new_residual and steg values
670 call mpi_allreduce(mpi_in_place, steg, 1, &
671 mpi_real_precision, mpi_min, neko_comm, ierr)
672 call mpi_allreduce(mpi_in_place, new_residual, 1, &
673 mpi_real_precision, mpi_min, neko_comm, ierr)
674
675 ! The innermost loop to determine the suitable step length
676 ! using the Backtracking Line Search approach
677 itto = 0
678 do while ((new_residual .gt. residual_norm) .and. (itto .lt. 50))
679 itto = itto + 1
680
681 ! update the variables
682 x = xold + steg*dx
683 y = yold + steg*dy
684 z = zold + steg*dz
685
686 lambda = lambdaold + steg*dlambda
687 xsi = xsiold + steg*dxsi
688 eta = etaold + steg*deta
689 mu = muold + steg*dmu
690 zeta = zetaold + steg*dzeta
691 s = sold + steg*ds
692
693 ! Recompute the new_residual to see if this stepsize improves
694 ! the residue
695 rex = (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
696 / (this%upp%x - x)**2 &
697 - (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
698 / (x - this%low%x)**2 &
699 - xsi + eta
700
701 rey = this%c%x + this%d%x*y - lambda - mu
702 rez = this%a0 - zeta - dot_product(lambda, this%a%x)
703
704 ! Accumulate sums for relambda (the term gi(x))
705 relambda = 0.0_rp
706 do i = 1, this%m
707 do j = 1, this%n
708 relambda(i) = relambda(i) &
709 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
710 + this%qij%x(i, j) / (x(j) - this%low%x(j))
711 end do
712 end do
713
714 call mpi_allreduce(mpi_in_place, relambda, this%m, &
715 mpi_real_precision, mpi_sum, neko_comm, ierr)
716
717 relambda = relambda - this%a%x*z - y + s - this%bi%x
718
719 rexsi = xsi * (x - this%alpha%x) - epsi
720 reeta = eta * (this%beta%x - x) - epsi
721 remu = mu * y - epsi
722 rezeta = zeta * z - epsi
723 res = lambda * s - epsi
724
725 ! Compute squared norms for the residuals
726 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
727 call mpi_allreduce(mpi_in_place, re_sq_norm, &
728 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
729
730 residual_small = [rey, rez, relambda, remu, rezeta, res]
731 new_residual = sqrt(norm2(residual_small)**2 + re_sq_norm)
732 call mpi_allreduce(mpi_in_place, new_residual, 1, &
733 mpi_real_precision, mpi_min, neko_comm, ierr)
734
735 steg = steg / 2.0_rp
736 end do
737 steg = 2.0_rp * steg ! Correction for the final division by 2
738
739 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
740
741 ! Update the maximum and norm of the residuals
742 residual_norm = new_residual
743 residual_max = maxval(abs(residual))
744 call mpi_allreduce(mpi_in_place, residual_max, 1, &
745 mpi_real_precision, mpi_max, neko_comm, ierr)
746 end do
747
748 epsi = 0.1_rp * epsi
749 end do
750
751 ! Save the new designx
752 this%xold2%x = this%xold1%x
753 this%xold1%x = designx
754 designx = x
755
756 !update the parameters of the MMA object nesessary to compute KKT residual
757 this%y%x = y
758 this%z = z
759 this%lambda%x = lambda
760 this%zeta = zeta
761 this%xsi%x = xsi
762 this%eta%x = eta
763 this%mu%x = mu
764 this%s%x = s
765
766 end subroutine mma_subsolve_dpip_cpu
767
770 subroutine mma_subsolve_dip_cpu(this, designx)
771 ! ------------------------------------------------------------------------ !
772 ! -------------------------------Dual Solver------------------------------ !
773 ! ------------------------------------------------------------------------ !
774 ! This implementation is based on: !
775 ! https://doi.org/10.1007/s00158-012-0869-2 !
776 ! Definition of the Lagrangian function: !
777 ! (Note that the equation is slightly different with d(i)=1 and a quadratic!
778 ! term for z. This is done to ensure that we have quadratic terms for both !
779 ! y and z.) !
780 ! !
781 ! L(x, y, z, λ) = !
782 ! sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i * p_{ij}) / (u_j - x_j)!
783 ! + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) / (x_j - l_j) ]!
784 ! - sum_{i=1}^{m} λ_i * b_i !
785 ! + sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * y_i^2 ] !
786 ! + (a_0 - sum_{i=1}^{m} λ_i * a_i) * z + 0.5 * z^2 !
787 ! !
788 ! Breakdown of terms: !
789 ! - Terms related to x: L_x (the first three lines of L(x, y, z, λ)) !
790 ! - Terms related to y: L_y (the fourth line of L(x, y, z, λ)) !
791 ! - Terms related to z: L_z (the last line of L(x, y, z, λ)) !
792 ! !
793 ! Optimization problem if λ is given: !
794 ! !
795 ! Minimize L(x, y, z, λ) !
796 ! subject to: α_j ≤ x_j ≤ β_j, z ≥ 0, and y_i ≥ 0 for all i, j. !
797 ! !
798 ! Since the problem is separable: !
799 ! Ψ(λ) = !
800 ! sum_{j=1}^{n} min_xj {L_x(x_j, λ) | α_j ≤ x_j ≤ β_j} !
801 ! + min_z {L_z(z, λ) | z ≥ 0} !
802 ! + sum_{i=1}^{m} min_yi {L_y(y_i, λ) | y_i ≥ 0} !
803 ! !
804 ! Maximize Ψ(λ) subject to λ_i ≥ 0 for i = 1, ..., m. !
805 ! !
806 ! ------------------------------------------------------------------------ !
807
808 class(mma_t), intent(inout) :: this
809 real(kind=rp), dimension(this%n), intent(inout) :: designx
810 ! Note that there is a local dummy "x" in this subroutine, thus, we call
811 ! the current design "designx" instead of just "x"
812 integer :: i, j, k, iter, ierr
813 real(kind=rp) :: epsi, residual_max, z, steg
814 real(kind=rp), dimension(this%m) :: y, lambda, mu, &
815 relambda, remu, dlambda, dmu, gradlambda
816 real(kind=rp), dimension(this%n) :: x, pjlambda, qjlambda
817
818 ! To compute the Hessian based on eq(13)
819 ! https://doi.org/10.1007/s00158-012-0869-2
820
821 ! inverse of a diag matrix:
822 real(kind=rp), dimension(this%n) :: ljjxinv ! [∇_x^2 Ljj]−1
823 real(kind=rp), dimension(this%m,this%n) :: hijx ! ∇_x hij
824 real(kind=rp), dimension(this%m,this%m) :: hess
825 real(kind=rp) :: hesstrace
826
827
828 ! using DGESV in lapack to solve
829 ! the linear system which needs the following parameters
830 integer :: info
831 integer, dimension(this%m+1) :: ipiv
832
833 ! Parameters for global communication
834 real(kind=rp) :: minimal_epsilon
835
836 ! ------------------------------------------------------------------------ !
837 ! initial value for the parameters in the subsolve based on
838 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
839
840 epsi = 1.0_rp !100
841 ! x = 0.5_rp * (this%alpha%x + this%beta%x)
842 y = 1.0_rp
843 z = 0.0_rp
844 lambda = max(1.0_rp, 0.5_rp * this%c%x)
845 mu = 1.0_rp !this parameter is eta in Niel's paper
846 ! note that mu in the paper translates to epsi in the code following the
847 ! same style as the Cpp code by Neils
848
849 ! ------------------------------------------------------------------------ !
850 ! Computing the minimal epsilon and choose the most conservative one
851
852 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
853 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
854 mpi_real_precision, mpi_min, neko_comm, ierr)
855
856 ! ------------------------------------------------------------------------ !
857 ! The main loop of the dual-primal interior point method.
858
859 do while (epsi .gt. minimal_epsilon)
860
861 ! --------------------------------------------------------------------- !
862 ! Calculating residuals based on
863 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
864 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
865
866 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
867 pij => this%pij%x, qij => this%qij%x, &
868 low => this%low%x, upp => this%upp%x, &
869 alpha => this%alpha%x, beta => this%beta%x, &
870 c => this%c%x, d => this%d%x, &
871 a0 => this%a0, a => this%a%x, &
872 bi => this%bi%x)
873 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
874 ! the initial value of λ
875
876 ! Comput the value of y that minimizes L_y for the current λ
877 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * y_i^2 ])
878 ! dL_y/dy =0 => y= (λ_i - c_i), ensure y>=0
879 y = max(0.0_rp, lambda - c)
880
881 ! Comput the value of z that minimizes L_z for the current λ
882 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z + 0.5 * z^2)
883 ! ensure z>=0
884 z = max(0.0_rp,dot_product(lambda, a) - a0)
885
886 ! Comput the value of x that minimizes L_x for the current λ
887 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} ��_i *
888 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
889 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
890 pjlambda = (p0j + matmul(transpose(pij), lambda))
891 qjlambda = (q0j + matmul(transpose(qij), lambda))
892 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
893 (sqrt(pjlambda) + sqrt(qjlambda))
894
895 ! Ensure that x is feasible (alpha<=x<=beta)
896 x = merge(alpha, x, x .lt. alpha)
897 x = merge(beta, x, x .gt. beta)
898
899 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
900 relambda = matmul(pij, 1.0_rp / (upp - x)) + &
901 matmul(qij, 1.0_rp / (x - low))
902
903 ! Global comminucation for relambda values
904 call mpi_allreduce(mpi_in_place, relambda, this%m, &
905 mpi_real_precision, mpi_sum, neko_comm, ierr)
906 relambda = relambda - bi - y - a * z + mu
907
908 ! Compute residual for mu (eta in the paper)
909 remu = mu * lambda - epsi
910
911 residual_max = maxval(abs([relambda, remu]))
912 call mpi_allreduce(mpi_in_place, residual_max, 1, &
913 mpi_real_precision, mpi_max, neko_comm, ierr)
914
915 ! ------------------------------------------------------------------- !
916 ! Internal loop
917 do iter = 1, this%max_iter
918
919 !Check the condition
920 if (residual_max .lt. epsi) exit
921
922 ! Compute dL(x, y, z, λ)/dλ for the updated x(λ), y(λ), z(λ)
923
924 gradlambda = matmul(pij, 1.0_rp / (upp - x)) + &
925 matmul(qij, 1.0_rp/(x - low))
926
927 ! Global comminucation for gradlambda values
928 call mpi_allreduce(mpi_in_place, gradlambda, this%m, &
929 mpi_real_precision, mpi_sum, neko_comm, ierr)
930 gradlambda = gradlambda - bi - y - a * z
931
932 ! Update gradlambda as the right hand side for Newton's method(eq10)
933 gradlambda = - gradlambda - epsi / lambda
934
935 ! Computing the Hessian as in equation (13) in
936 !! https://doi.org/10.1007/s00158-012-0869-2
937
938 !--------------contributions of x terms to Hess--------------------!
939 ljjxinv= - 1.0_rp / ( (2.0_rp * pjlambda/(upp - x)**3) + &
940 (2.0_rp * qjlambda/(x - low)**3))
941
942
943 ! Remove the sensitivity for the active primal constraints
944 ljjxinv = merge(0.0_rp, ljjxinv, x - alpha < neko_eps)
945 ljjxinv = merge(0.0_rp, ljjxinv, beta - x < neko_eps)
946
947 do i = 1, this%m
948 hijx(i,:) = pij(i,:) / (upp - x)**2 &
949 - qij(i,:) / (x - low)**2
950 end do
951
952 hess = 0.0_rp
953 ! Direct computation of the matrix multiplication
954 ! (for better performance)
955 do i = 1, this%m
956 do j = 1, this%m
957 ! Compute the (i, j) element of AA
958 do k = 1, this%n !this n is global
959 hess(i, j) = hess(i, j) &
960 + hijx(i, k) * (ljjxinv(k)) * hijx(j, k)
961 end do
962 end do
963 end do
964
965 call mpi_allreduce(mpi_in_place, hess, &
966 this%m * this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
967
968 !---------------contributions of z terms to Hess-------------------!
969 ! Only for inactive constraint, we consider contributions to Hess
970 ! based on the cpp code by Niels.
971 if (dot_product(lambda, a) .gt. 0.0_rp) then
972 do i = 1, this%m
973 do j = 1, this%m
974 hess(i, j) = hess(i, j) - a(i) * a(j)
975 end do
976 end do
977 end if
978
979 !---------------contributions of y terms to Hess-------------------!
980 ! Only for inactive constraint, we consider contributions to Hess.
981 do i = 1, this%m
982 if (y(i) .gt. 0.0_rp) then
983 hess(i, i) = hess(i, i) - 1.0_rp
984 end if
985 ! Based on eq(10), note the term (-\Omega \Lambda)
986 hess(i, i) = hess(i, i) - mu(i) / lambda(i)
987 end do
988
989 ! Improve the robustness by stablizing the Hess using
990 ! Levenberg-Marquardt algorithm (heuristically)
991 hesstrace = 0.0_rp
992 do i=1, this%m
993 hesstrace = hesstrace + hess(i, i)
994 end do
995 do i=1, this%m
996 hess(i,i) = hess(i, i) - &
997 max(-1.0e-4_rp*hesstrace/this%m, 1.0e-7_rp)
998 end do
999
1000 call dgesv(this%m , 1, hess, this%m , ipiv, &
1001 gradlambda, this%m, info)
1002
1003 if (info .ne. 0) then
1004 call neko_error("DGESV failed to solve the linear system in " // &
1005 "mma_subsolve_dip.")
1006 end if
1007 dlambda = gradlambda
1008
1009 ! based on eq(11) for delta eta
1010 dmu = -mu + epsi / lambda - dlambda * mu / lambda
1011
1012 ! Compute the stepsize and update lambda and mu (eta in the paper)
1013
1014 steg = 1.005_rp
1015 do i = 1, this%m
1016 steg = merge(-1.01_rp * dlambda(i) / lambda(i), steg, &
1017 steg < -1.01_rp * dlambda(i) / lambda(i))
1018 steg = merge(-1.01_rp * dmu(i) / mu(i), &
1019 steg, steg < -1.01_rp * dmu(i) / mu(i))
1020 end do
1021
1022 steg = 1.0_rp / steg
1023
1024 lambda = lambda + steg*dlambda
1025 mu = mu + steg*dmu
1026
1027 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
1028 ! the updated values of λ
1029
1030 ! Comput the value of y that minimizes L_y for the current λ
1031 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * y_i^2 ])
1032 ! dL_y/dy =0 => y= (λ_i - c_i), ensure y>=0
1033 y = max(0.0_rp, lambda - c)
1034
1035
1036 ! Comput the value of z that minimizes L_z for the current λ
1037 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z + 0.5 * z^2)
1038 ! ensure z>=0
1039 z = max(0.0_rp,dot_product(lambda, a) - a0)
1040
1041 ! Comput the value of x that minimizes L_x for the current λ
1042 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i *
1043 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
1044 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
1045 pjlambda = (p0j + matmul(transpose(pij), lambda))
1046 qjlambda = (q0j + matmul(transpose(qij), lambda))
1047 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
1048 (sqrt(pjlambda) + sqrt(qjlambda))
1049
1050 ! Ensure that x is feasible (alpha<=x<=beta)
1051 x = merge(alpha, x, x .lt. alpha)
1052 x = merge(beta, x, x .gt. beta)
1053
1054 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
1055 relambda = matmul(pij, 1.0_rp / (upp - x)) + &
1056 matmul(qij, 1.0_rp / (x - low))
1057 ! Global comminucation for relambda values
1058 call mpi_allreduce(mpi_in_place, relambda, this%m, &
1059 mpi_real_precision, mpi_sum, neko_comm, ierr)
1060 relambda = relambda - bi - y - a * z + mu
1061
1062 ! Compute residual for mu (eta in the paper)
1063 remu = mu * lambda - epsi
1064
1065 residual_max = maxval(abs([relambda, remu]))
1066 call mpi_allreduce(mpi_in_place, residual_max, 1, &
1067 mpi_real_precision, mpi_max, neko_comm, ierr)
1068 end do
1069 end associate
1070
1071 epsi = 0.1_rp * epsi
1072 end do
1073
1074 ! Save the new designx
1075 this%xold2%x = this%xold1%x
1076 this%xold1%x = designx
1077 designx = x
1078
1079 !update the parameters of the MMA object nesessary to compute KKT residual
1080
1081 this%y%x = y
1082 this%z = z
1083 this%lambda%x = lambda
1084 this%mu%x = mu
1085 end subroutine mma_subsolve_dip_cpu
1086
1087end submodule mma_cpu
MMA module.
Definition mma.f90:69