Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
mma_cpu.f90
1
33
34submodule(mma) mma_cpu
35 use lapack_interfaces, only: dgesv
36 use mpi_f08, only: mpi_in_place, mpi_max, mpi_min
37 use comm, only: neko_comm, pe_rank, mpi_real_precision
38 use math, only: neko_eps
39 use profiler, only: profiler_start_region, profiler_end_region
40
41 implicit none
42
43contains
44
45 module subroutine mma_update_cpu(this, iter, x, df0dx, fval, dfdx)
46 ! ----------------------------------------------------- !
47 ! Update the design variable x by solving the convex !
48 ! approximation of the problem. !
49 ! !
50 ! This subroutine is called in each iteration of the !
51 ! optimization loop !
52 ! ----------------------------------------------------- !
53 class(mma_t), intent(inout) :: this
54 integer, intent(in) :: iter
55 real(kind=rp), dimension(this%n), intent(inout) :: x
56 real(kind=rp), dimension(this%n), intent(in) :: df0dx
57 real(kind=rp), dimension(this%m), intent(in) :: fval
58 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
59
60 if (.not. this%is_initialized) then
61 call neko_error("The MMA object is not initialized.")
62 end if
63
64 call profiler_start_region("MMA update")
65
66 ! generate a convex approximation of the problem
67 call profiler_start_region("MMA gensub")
68 call mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
69 call profiler_end_region("MMA gensub")
70
71 !solve the approximation problem using interior point method
72 call profiler_start_region("MMA subsolve")
73 if (this%subsolver .eq. "dip") then
74 call mma_subsolve_dip_cpu(this, x)
75 else
76 call mma_subsolve_dpip_cpu(this, x)
77 end if
78 call profiler_end_region("MMA subsolve")
79
80 call profiler_end_region("MMA update")
81
82 this%is_updated = .true.
83 end subroutine mma_update_cpu
84
86 module subroutine mma_kkt_cpu(this, x, df0dx, fval, dfdx)
87 ! ----------------------------------------------------- !
88 ! Compute the KKT condition right hand side for a given !
89 ! designx x and set the max and norm values of the !
90 ! residue of KKT system to this%residumax and !
91 ! this%residunorm. !
92 ! !
93 ! The left hand sides of the KKT conditions are computed!
94 ! for the following nonlinear programming problem: !
95 ! Minimize f_0(x) + a_0*z + !
96 ! sum(c_i*y_i + 0.5*d_i*(y_i)^2)!
97 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
98 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
99 ! z >= 0, y_i >= 0, i = 1,...,m !
100 ! !
101 ! !
102 ! Note that before calling this function, the function !
103 ! values (f0val, fval, dfdx, ...) should be updated !
104 ! using the new x values. !
105 ! ----------------------------------------------------- !
106 class(mma_t), intent(inout) :: this
107 real(kind=rp), dimension(this%n), intent(in) :: x
108 real(kind=rp), dimension(this%n), intent(in) :: df0dx
109 real(kind=rp), dimension(this%m), intent(in) :: fval
110 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
111
112 call profiler_start_region("MMA KKT computation")
113
114 if (this%subsolver .eq. "dip") then
115 call mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
116 else
117 call mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
118 end if
119
120 call profiler_end_region("MMA KKT computation")
121 end subroutine mma_kkt_cpu
122
124 ! point method (dpip) subsolve of MMA algorithm.
125 module subroutine mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
126 ! ----------------------------------------------------- !
127 ! Compute the KKT condition right hand side for a given !
128 ! designx x and set the max and norm values of the !
129 ! residue of KKT system to this%residumax and !
130 ! this%residunorm. !
131 ! !
132 ! The left hand sides of the KKT conditions are computed!
133 ! for the following nonlinear programming problem: !
134 ! Minimize f_0(x) + a_0*z + !
135 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
136 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
137 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
138 ! z >= 0, y_i >= 0, i = 1,...,m !
139 ! !
140 ! !
141 ! Note that before calling this function, the function !
142 ! values (f0val, fval, dfdx, ...) should be updated !
143 ! using the new x values. !
144 ! ----------------------------------------------------- !
145 class(mma_t), intent(inout) :: this
146 real(kind=rp), dimension(this%n), intent(in) :: x
147 real(kind=rp), dimension(this%n), intent(in) :: df0dx
148 real(kind=rp), dimension(this%m), intent(in) :: fval
149 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
150
151 real(kind=rp) :: rez, rezeta
152 real(kind=rp), dimension(this%m) :: rey, relambda, remu, res
153 real(kind=rp), dimension(this%n) :: rex, rexsi, reeta
154 real(kind=rp), dimension(3*this%n+4*this%m+2) :: residual
155
156 real(kind=rp), dimension(4*this%m+2) :: residual_small
157 integer :: ierr
158 real(kind=rp) :: re_sq_norm
159
160 rex = df0dx + matmul(transpose(dfdx), this%lambda%x) &
161 - this%xsi%x + this%eta%x
162 rey = this%c%x + this%d%x*this%y%x - this%lambda%x - this%mu%x
163 rez = this%a0 - this%zeta - dot_product(this%lambda%x, this%a%x)
164
165 relambda = fval - this%a%x * this%z - this%y%x + this%s%x
166 rexsi = this%xsi%x * (x - this%xmin%x)
167 reeta = this%eta%x * (this%xmax%x - x)
168 remu = this%mu%x * this%y%x
169 rezeta = this%zeta * this%z
170 res = this%lambda%x * this%s%x
171
172 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
173 residual_small = [rey, rez, relambda, remu, rezeta, res]
174
175 this%residumax = maxval(abs(residual))
176 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
177
178 call mpi_allreduce(mpi_in_place, this%residumax, 1, &
179 mpi_real_precision, mpi_max, neko_comm, ierr)
180
181 call mpi_allreduce(mpi_in_place, re_sq_norm, 1, &
182 mpi_real_precision, mpi_sum, neko_comm, ierr)
183
184 this%residunorm = sqrt(norm2(residual_small)**2 + re_sq_norm)
185 end subroutine mma_dpip_kkt_cpu
186
188 ! point method (dip) subsolve of MMA algorithm.
189 module subroutine mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
190 ! ----------------------------------------------------- !
191 ! Compute the KKT condition right hand side for a given !
192 ! designx x and set the max and norm values of the !
193 ! residue of KKT system to this%residumax and !
194 ! this%residunorm. !
195 ! !
196 ! The left hand sides of the KKT conditions are computed!
197 ! for the following nonlinear programming problem: !
198 ! Minimize f_0(x) + a_0*z + !
199 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
200 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
201 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
202 ! z >= 0, y_i >= 0, i = 1,...,m !
203 ! !
204 ! !
205 ! Note that before calling this function, the function !
206 ! values (f0val, fval, dfdx, ...) should be updated !
207 ! using the new x values. !
208 ! ----------------------------------------------------- !
209 class(mma_t), intent(inout) :: this
210 real(kind=rp), dimension(this%n), intent(in) :: x
211 real(kind=rp), dimension(this%n), intent(in) :: df0dx
212 real(kind=rp), dimension(this%m), intent(in) :: fval
213 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
214
215 real(kind=rp), dimension(this%m) :: relambda, remu
216 real(kind=rp), dimension(2*this%m) :: residual
217
218
219 relambda = fval - this%a%x * this%z - this%y%x + this%mu%x
220 ! Compute residual for mu (eta in the paper)
221 remu = this%lambda%x * this%mu%x
222
223 residual = abs([relambda, remu])
224 this%residumax = maxval(residual)
225 this%residunorm = norm2(residual)
226
227 end subroutine mma_dip_kkt_cpu
228
229 !============================================================================!
230 ! private internal subroutines
231
233 subroutine mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
234 ! ----------------------------------------------------- !
235 ! Generate the approximation sub problem by computing !
236 ! the lower and upper asymtotes and the other necessary !
237 ! parameters (alpha, beta, p0j, q0j, pij, qij, ...). !
238 ! ----------------------------------------------------- !
239 class(mma_t), intent(inout) :: this
240 real(kind=rp), dimension(this%n), intent(in) :: x
241 real(kind=rp), dimension(this%n), intent(in) :: df0dx
242 real(kind=rp), dimension(this%m), intent(in) :: fval
243 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
244 integer, intent(in) :: iter
245 integer :: i, j, ierr
246 real(kind=rp), dimension(this%n) :: x_diff
247 real(kind=rp) :: asy_factor
248
249 x_diff = this%xmax%x - this%xmin%x
250
251 ! ------------------------------------------------------------------------ !
252 ! Setup the current asymptotes
253 associate(low => this%low%x, upp => this%upp%x, &
254 x_1 => this%xold1%x, x_2 => this%xold2%x)
255
256 if (iter .lt. 3) then
257 ! Initialize the lower and upper asymptotes
258 low = x - this%asyinit * x_diff
259 upp = x + this%asyinit * x_diff
260 else
261 do j = 1, this%n
262 if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .lt. 0.0_rp) then
263 asy_factor = this%asydecr
264 else if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .gt. 0.0_rp) then
265 asy_factor = this%asyincr
266 else
267 asy_factor = 1.0_rp
268 end if
269
270 low(j) = x(j) - asy_factor * (x_1(j) - low(j))
271 upp(j) = x(j) + asy_factor * (upp(j) - x_1(j))
272 end do
273
274
275 ! Setting a minimum and maximum for the low and upp
276 ! asymptotes (eq3.9)
277 low = max(low, x - 10.0_rp * x_diff)
278 low = min(low, x - 0.01_rp * x_diff)
279
280 upp = min(upp, x + 10.0_rp * x_diff)
281 upp = max(upp, x + 0.01_rp * x_diff)
282 end if
283
284 end associate
285
286
287 ! ------------------------------------------------------------------------ !
288 ! Set the the bounds and coefficients for the approximation
289 ! the move bounds (alpha and beta) are slightly more restrictive
290 ! than low and upp. This is done based on eq(3.6)--eq(3.10).
291 ! also check
292 ! https://comsolyar.com/wp-content/uploads/2020/03/gcmma.pdf
293 ! eq (2.8) and (2.9)
294
295 associate(alpha => this%alpha%x, beta => this%beta%x, &
296 xmin => this%xmin%x, xmax => this%xmax%x, &
297 low => this%low%x, upp => this%upp%x, x => x)
298
299 alpha = max(xmin, low + 0.1_rp*(x - low), x - 0.5_rp*x_diff)
300 beta = min(xmax, upp - 0.1_rp*(upp - x), x + 0.5_rp*x_diff)
301 end associate
302
303 ! ------------------------------------------------------------------------ !
304 ! Calculate p0j, q0j, pij, qij
305 ! where j = 1,2,...,n and i = 1,2,...,m (eq(2.3)-eq(2.5))
306
307 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
308 pij => this%pij%x, qij => this%qij%x, &
309 low => this%low%x, upp => this%upp%x)
310
311 p0j = ( &
312 1.001_rp * max(df0dx, 0.0_rp) &
313 + 0.001_rp * max(-df0dx, 0.0_rp) &
314 + 0.00001_rp / max(x_diff, 0.00001_rp) &
315 ) * (upp - x)**2
316
317 q0j = ( &
318 0.001_rp * max(df0dx, 0.0_rp) &
319 + 1.001_rp * max(-df0dx, 0.0_rp) &
320 + 0.00001_rp / max(x_diff, 0.00001_rp)&
321 ) * (x - low)**2
322
323 do j = 1, this%n
324 do i = 1, this%m
325 pij(i, j) = ( &
326 1.001_rp * max(dfdx(i, j), 0.0_rp) &
327 + 0.001_rp * max(-dfdx(i, j), 0.0_rp) &
328 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
329 ) * (upp(j) - x(j))**2
330
331 qij(i, j) = ( &
332 0.001_rp * max(dfdx(i, j), 0.0_rp) &
333 + 1.001_rp * max(-dfdx(i, j), 0.0_rp) &
334 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
335 ) * (x(j) - low(j))**2
336 end do
337 end do
338
339 end associate
340
341 ! ------------------------------------------------------------------------ !
342 ! Computing bi as defined in page 5
343
344 associate(bi => this%bi%x, &
345 pij => this%pij%x, qij => this%qij%x, &
346 low => this%low%x, upp => this%upp%x)
347
348 bi = 0.0_rp
349 do i = 1, this%m
350 do j = 1, this%n
351 bi(i) = bi(i) &
352 + pij(i, j) / (upp(j) - x(j)) &
353 + qij(i, j) / (x(j) - low(j))
354 end do
355 end do
356
357 call mpi_allreduce(mpi_in_place, bi, this%m, &
358 mpi_real_precision, mpi_sum, neko_comm, ierr)
359 bi = bi - fval
360
361 end associate
362 end subroutine mma_gensub_cpu
363
366 subroutine mma_subsolve_dpip_cpu(this, designx)
367 ! ------------------------------------------------------- !
368 ! Dual-primal interior point method using Newton's step !
369 ! to solve MMA sub problem. !
370 ! A Backtracking Line Search approach is used to compute !
371 ! the step size; starting with the full Newton's step !
372 ! (delta = 1) and dividing by 2 until we have a step size !
373 ! that leads to a feasible point while ensuring a !
374 ! decrease in the residue. !
375 ! ------------------------------------------------------- !
376
377 class(mma_t), intent(inout) :: this
378 real(kind=rp), dimension(this%n), intent(inout) :: designx
379 ! Note that there is a local dummy "x" in this subroutine, thus, we call
380 ! the current design "designx" instead of just "x"
381 integer :: i, j, k, iter, itto, ierr
382 real(kind=rp) :: epsi, residual_max, residual_norm, &
383 z, zeta, rez, rezeta, &
384 delz, dz, dzeta, &
385 steg, zold, zetaold, new_residual
386 real(kind=rp), dimension(this%m) :: y, lambda, s, mu, &
387 rey, relambda, remu, res, &
388 dely, dellambda, &
389 dy, dlambda, ds, dmu, &
390 yold, lambdaold, sold, muold
391 real(kind=rp), dimension(this%n) :: x, xsi, eta, &
392 rex, rexsi, reeta, &
393 delx, diagx, dx, dxsi, deta, &
394 xold, xsiold, etaold
395 real(kind=rp), dimension(4*this%m + 2) :: residual_small
396 real(kind=rp), dimension(3*this%n + 4*this%m + 2) :: residual
397 real(kind=rp), dimension(2*this%n + 4*this%m + 2) :: xx, dxx
398
399 real(kind=rp), dimension(this%m, this%n) :: gg
400 real(kind=rp), dimension(this%m+1) :: bb
401 real(kind=rp), dimension(this%m+1, this%m+1) :: aa
402 real(kind=rp), dimension(this%m * this%m) :: aa_buffer
403
404 ! using DGESV in lapack to solve
405 ! the linear system which needs the following parameters
406 integer :: info
407 integer, dimension(this%m+1) :: ipiv
408
409 ! Parameters for global communication
410 real(kind=rp) :: re_sq_norm
411 real(kind=rp) :: minimal_epsilon
412
413 ! ------------------------------------------------------------------------ !
414 ! initial value for the parameters in the subsolve based on
415 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
416
417 epsi = 1.0_rp !100
418 x = 0.5_rp * (this%alpha%x + this%beta%x)
419 y = 1.0_rp
420 z = 1.0_rp
421 zeta = 1.0_rp
422 lambda = 1.0_rp
423 s = 1.0_rp
424 xsi = max(1.0_rp, 1.0_rp / (x - this%alpha%x))
425 eta = max(1.0_rp, 1.0_rp / (this%beta%x - x))
426 mu = max(1.0_rp, 0.5_rp * this%c%x)
427
428 ! ------------------------------------------------------------------------ !
429 ! Computing the minimal epsilon and choose the most conservative one
430
431 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
432 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
433 mpi_real_precision, mpi_min, neko_comm, ierr)
434
435 ! ------------------------------------------------------------------------ !
436 ! The main loop of the dual-primal interior point method.
437
438 do while (epsi .gt. minimal_epsilon)
439
440 ! --------------------------------------------------------------------- !
441 ! Calculating residuals based on
442 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
443 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
444
445 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
446 pij => this%pij%x, qij => this%qij%x, &
447 low => this%low%x, upp => this%upp%x, &
448 alpha => this%alpha%x, beta => this%beta%x, &
449 c => this%c%x, d => this%d%x, &
450 a0 => this%a0, a => this%a%x)
451
452 rex = (p0j + matmul(transpose(pij), lambda)) / (upp - x)**2 &
453 - (q0j + matmul(transpose(qij), lambda)) / (x - low)**2 &
454 - xsi + eta
455
456 rey = c + d * y - lambda - mu
457 rez = a0 - zeta - dot_product(lambda, a)
458
459 relambda = 0.0_rp
460 do i = 1, this%m
461 do j = 1, this%n
462 ! Accumulate sums for relambda (the term gi(x))
463 relambda(i) = relambda(i) &
464 + pij(i, j) / (upp(j) - x(j)) &
465 + qij(i, j) / (x(j) - low(j))
466 end do
467 end do
468
469 end associate
470
471 ! --------------------------------------------------------------------- !
472 ! Computing the norm of the residuals
473
474 ! Complete the computations of lambda residuals
475 call mpi_allreduce(mpi_in_place, relambda, this%m, &
476 mpi_real_precision, mpi_sum, neko_comm, ierr)
477 relambda = relambda - this%a%x*z - y + s - this%bi%x
478
479 rexsi = xsi * (x - this%alpha%x) - epsi
480 reeta = eta * (this%beta%x - x) - epsi
481 remu = mu * y - epsi
482 rezeta = zeta * z - epsi
483 res = lambda * s - epsi
484
485 ! Setup vectors of residuals and their norms
486 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
487 residual_small = [rey, rez, relambda, remu, rezeta, res]
488
489 residual_max = maxval(abs(residual))
490 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
491
492 call mpi_allreduce(mpi_in_place, residual_max, 1, &
493 mpi_real_precision, mpi_max, neko_comm, ierr)
494
495 call mpi_allreduce(mpi_in_place, re_sq_norm, &
496 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
497
498 residual_norm = sqrt(norm2(residual_small)**2 + re_sq_norm)
499
500 ! --------------------------------------------------------------------- !
501 ! Internal loop
502
503 do iter = 1, this%max_iter
504
505 !Check the condition
506 if (residual_max .lt. epsi) exit
507
508 delx = 0.0_rp
509 do j = 1, this%n
510 do i = 1, this%m
511 delx(j) = delx(j) &
512 + this%pij%x(i,j) * lambda(i) / (this%upp%x(j) - x(j))**2 &
513 - this%qij%x(i,j) * lambda(i) / (x(j) - this%low%x(j))**2
514 end do
515 end do
516
517 delx = delx &
518 + this%p0j%x / (this%upp%x - x)**2 &
519 - this%q0j%x / (x - this%low%x)**2 &
520 - epsi / (x - this%alpha%x) &
521 + epsi / (this%beta%x - x)
522
523 dely = this%c%x + this%d%x * y - lambda - epsi / y
524 delz = this%a0 - dot_product(lambda, this%a%x) - epsi / z
525
526 ! Accumulate sums for dellambda (the term gi(x))
527 dellambda = 0.0_rp
528 do i = 1, this%m
529 do j = 1, this%n
530 dellambda(i) = dellambda(i) &
531 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
532 + this%qij%x(i, j) / (x(j) - this%low%x(j))
533 end do
534 end do
535
536 call mpi_allreduce(mpi_in_place, dellambda, this%m, &
537 mpi_real_precision, mpi_sum, neko_comm, ierr)
538
539 dellambda = dellambda - this%a%x*z - y - this%bi%x + epsi / lambda
540
541 do i = 1, this%m
542 gg(i,:) = this%pij%x(i,:) / (this%upp%x - x)**2 &
543 - this%qij%x(i,:) / (x - this%low%x)**2
544 end do
545
546 diagx = &
547 (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
548 / (this%upp%x - x)**3 &
549 + (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
550 / (x - this%low%x)**3
551
552 diagx = 2.0_rp * diagx &
553 + xsi / (x - this%alpha%x) &
554 + eta / (this%beta%x - x)
555
556
557 !Here we only consider the case m<n in the matlab code
558 !assembling the right hand side matrix based on eq(5.20)
559 ! bb = [dellambda + dely/(this%d%x + &
560 ! (mu/y)) - matmul(GG,delx/diagx), delz ]
561
562 !--------------------------------------------------------------------!
563 ! for MPI computation of bb
564
565 bb = 0.0_rp
566 do i = 1, this%m
567 do j = 1, this%n
568 bb(i) = bb(i) + gg(i, j) * (delx(j) / diagx(j))
569 end do
570 end do
571
572 call mpi_allreduce(mpi_in_place, bb, this%m, &
573 mpi_real_precision, mpi_sum, neko_comm, ierr)
574
575 bb(1:this%m) = dellambda + dely / (this%d%x + mu / y) - bb(1:this%m)
576 bb(this%m + 1) = delz
577
578 !--------------------------------------------------------------------!
579 ! assembling the coefficients matrix AA based on eq(5.20)
580 ! AA(1:this%m,1:this%m) = &
581 ! matmul(matmul(GG,mma_diag(1/diagx)), transpose(GG))
582 ! !update diag(AA)
583 ! AA(1:this%m,1:this%m) = AA(1:this%m,1:this%m) + &
584 ! mma_diag(s/lambda + 1.0/(this%d%x + (mu/y)))
585
586 aa = 0.0_rp
587 ! Direct computation of the matrix multiplication
588 ! (for better performance)
589 do i = 1, this%m
590 do j = 1, this%m
591 ! Compute the (i, j) element of AA
592 do k = 1, this%n !this n is global
593 aa(i, j) = aa(i, j) &
594 + gg(i, k) * (1.0_rp / diagx(k)) * gg(j, k)
595 end do
596 end do
597 end do
598
599 aa_buffer = reshape(aa(1:this%m, 1:this%m), [this%m * this%m])
600
601 call mpi_allreduce(mpi_in_place, aa_buffer, &
602 this%m*this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
603
604 aa(1:this%m, 1:this%m) = reshape(aa_buffer, [this%m, this%m])
605
606 do i = 1, this%m
607 ! update the diag AA
608 aa(i, i) = aa(i, i) &
609 + s(i) / lambda(i) &
610 + 1.0_rp / (this%d%x(i) + mu(i) / y(i))
611 end do
612
613 aa(1:this%m, this%m+1) = this%a%x
614 aa(this%m+1, 1:this%m) = this%a%x
615 aa(this%m+1, this%m+1) = - zeta/z
616
617 call dgesv(this%m + 1, 1, aa, this%m + 1, ipiv, bb, this%m + 1, info)
618
619 if (info .ne. 0) then
620 call neko_error("DGESV failed to solve the linear system in " // &
621 "mma_subsolve_dpip.")
622 end if
623
624
625 dlambda = bb(1:this%m)
626 dz = bb(this%m + 1)
627
628 ! based on eq(5.19)
629 dx = - delx / diagx - matmul(transpose(gg), dlambda) / diagx
630 dy = (-dely + dlambda) / (this%d%x + mu / y)
631
632 dxsi = -xsi + (epsi - dx * xsi) / (x - this%alpha%x)
633 deta = -eta + (epsi + dx * eta) / (this%beta%x - x)
634 dmu = -mu + (epsi - mu * dy) / y
635 dzeta = -zeta + (epsi - zeta * dz) / z
636 ds = -s + (epsi - dlambda * s) / lambda
637
638 dxx = [dy, dz, dlambda, dxsi, deta, dmu, dzeta, ds]
639 xx = [y, z, lambda, xsi, eta, mu, zeta, s]
640
641 steg = 1.0_rp / maxval([ &
642 1.0_rp, &
643 -1.01_rp * dxx / xx, &
644 -1.01_rp * dx / (x - this%alpha%x), &
645 1.01_rp * dx / (this%beta%x - x) &
646 ])
647
648 ! Save the old values
649 xold = x
650 yold = y
651 zold = z
652 lambdaold = lambda
653 xsiold = xsi
654 etaold = eta
655 muold = mu
656 zetaold = zeta
657 sold = s
658
659 new_residual = 2.0_rp * residual_norm
660
661 ! Share the new_residual and steg values
662 call mpi_allreduce(mpi_in_place, steg, 1, &
663 mpi_real_precision, mpi_min, neko_comm, ierr)
664 call mpi_allreduce(mpi_in_place, new_residual, 1, &
665 mpi_real_precision, mpi_min, neko_comm, ierr)
666
667 ! The innermost loop to determine the suitable step length
668 ! using the Backtracking Line Search approach
669 itto = 0
670 do while ((new_residual .gt. residual_norm) .and. (itto .lt. 50))
671 itto = itto + 1
672
673 ! update the variables
674 x = xold + steg*dx
675 y = yold + steg*dy
676 z = zold + steg*dz
677
678 lambda = lambdaold + steg*dlambda
679 xsi = xsiold + steg*dxsi
680 eta = etaold + steg*deta
681 mu = muold + steg*dmu
682 zeta = zetaold + steg*dzeta
683 s = sold + steg*ds
684
685 ! Recompute the new_residual to see if this stepsize improves
686 ! the residue
687 rex = (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
688 / (this%upp%x - x)**2 &
689 - (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
690 / (x - this%low%x)**2 &
691 - xsi + eta
692
693 rey = this%c%x + this%d%x*y - lambda - mu
694 rez = this%a0 - zeta - dot_product(lambda, this%a%x)
695
696 ! Accumulate sums for relambda (the term gi(x))
697 relambda = 0.0_rp
698 do i = 1, this%m
699 do j = 1, this%n
700 relambda(i) = relambda(i) &
701 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
702 + this%qij%x(i, j) / (x(j) - this%low%x(j))
703 end do
704 end do
705
706 call mpi_allreduce(mpi_in_place, relambda, this%m, &
707 mpi_real_precision, mpi_sum, neko_comm, ierr)
708
709 relambda = relambda - this%a%x*z - y + s - this%bi%x
710
711 rexsi = xsi * (x - this%alpha%x) - epsi
712 reeta = eta * (this%beta%x - x) - epsi
713 remu = mu * y - epsi
714 rezeta = zeta * z - epsi
715 res = lambda * s - epsi
716
717 ! Compute squared norms for the residuals
718 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
719 call mpi_allreduce(mpi_in_place, re_sq_norm, &
720 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
721
722 residual_small = [rey, rez, relambda, remu, rezeta, res]
723 new_residual = sqrt(norm2(residual_small)**2 + re_sq_norm)
724
725 steg = steg / 2.0_rp
726 end do
727 steg = 2.0_rp * steg ! Correction for the final division by 2
728
729 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
730
731 ! Update the maximum and norm of the residuals
732 residual_norm = new_residual
733 residual_max = maxval(abs(residual))
734 call mpi_allreduce(mpi_in_place, residual_max, 1, &
735 mpi_real_precision, mpi_max, neko_comm, ierr)
736 end do
737
738 epsi = 0.1_rp * epsi
739 end do
740
741 ! Save the new designx
742 this%xold2%x = this%xold1%x
743 this%xold1%x = designx
744 designx = x
745
746 !update the parameters of the MMA object nesessary to compute KKT residual
747 this%y%x = y
748 this%z = z
749 this%lambda%x = lambda
750 this%zeta = zeta
751 this%xsi%x = xsi
752 this%eta%x = eta
753 this%mu%x = mu
754 this%s%x = s
755
756 end subroutine mma_subsolve_dpip_cpu
757
760 subroutine mma_subsolve_dip_cpu(this, designx)
761 ! ------------------------------------------------------------------------ !
762 ! -------------------------------Dual Solver------------------------------ !
763 ! ------------------------------------------------------------------------ !
764 ! This implementation is based on: !
765 ! https://doi.org/10.1007/s00158-012-0869-2 !
766 ! Definition of the Lagrangian function: !
767 ! !
768 ! L(x, y, z, λ) = !
769 ! sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i * p_{ij}) / (u_j - x_j)!
770 ! + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) / (x_j - l_j) ]!
771 ! - sum_{i=1}^{m} λ_i * b_i !
772 ! + sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ] !
773 ! + (a_0 - sum_{i=1}^{m} λ_i * a_i) * z !
774 ! !
775 ! Breakdown of terms: !
776 ! - Terms related to x: L_x (the first three lines of L(x, y, z, λ)) !
777 ! - Terms related to y: L_y (the fourth line of L(x, y, z, λ)) !
778 ! - Terms related to z: L_z (the last line of L(x, y, z, λ)) !
779 ! !
780 ! Optimization problem if λ is given: !
781 ! !
782 ! Minimize L(x, y, z, λ) !
783 ! subject to: α_j ≤ x_j ≤ β_j, z ≥ 0, and y_i ≥ 0 for all i, j. !
784 ! !
785 ! Since the problem is separable: !
786 ! Ψ(λ) = !
787 ! sum_{j=1}^{n} min_xj {L_x(x_j, λ) | α_j ≤ x_j ≤ β_j} !
788 ! + min_z {L_z(z, λ) | z ≥ 0} !
789 ! + sum_{i=1}^{m} min_yi {L_y(y_i, λ) | y_i ≥ 0} !
790 ! !
791 ! Maximize Ψ(λ) subject to λ_i ≥ 0 for i = 1, ..., m. !
792 ! !
793 ! ------------------------------------------------------------------------ !
794
795 class(mma_t), intent(inout) :: this
796 real(kind=rp), dimension(this%n), intent(inout) :: designx
797 ! Note that there is a local dummy "x" in this subroutine, thus, we call
798 ! the current design "designx" instead of just "x"
799 integer :: i, j, k, iter, ierr
800 real(kind=rp) :: epsi, residual_max, z, steg
801 real(kind=rp), dimension(this%m) :: y, lambda, mu, &
802 relambda, remu, dlambda, dmu, gradlambda
803 real(kind=rp), dimension(this%n) :: x, pjlambda, qjlambda
804
805 ! To compute the Hessian based on eq(13)
806 ! https://doi.org/10.1007/s00158-012-0869-2
807
808 ! inverse of a diag matrix:
809 real(kind=rp), dimension(this%n) :: ljjxinv ! [∇_x^2 Ljj]−1
810 real(kind=rp), dimension(this%m,this%n) :: hijx ! ∇_x hij
811 real(kind=rp), dimension(this%m,this%m) :: hess
812 real(kind=rp) :: hesstrace
813
814
815 ! using DGESV in lapack to solve
816 ! the linear system which needs the following parameters
817 integer :: info
818 integer, dimension(this%m+1) :: ipiv
819
820 ! Parameters for global communication
821 real(kind=rp) :: minimal_epsilon
822
823 ! ------------------------------------------------------------------------ !
824 ! initial value for the parameters in the subsolve based on
825 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
826
827 epsi = 1.0_rp !100
828 ! x = 0.5_rp * (this%alpha%x + this%beta%x)
829 y = 1.0_rp
830 z = 0.0_rp
831 lambda = max(1.0_rp, 0.5_rp * this%c%x)
832 mu = 1.0_rp !this parameter is eta in Niel's paper
833 ! note that mu in the paper translates to epsi in the code following the
834 ! same style as the Cpp code by Neils
835
836 ! ------------------------------------------------------------------------ !
837 ! Computing the minimal epsilon and choose the most conservative one
838
839 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
840 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
841 mpi_real_precision, mpi_min, neko_comm, ierr)
842
843 ! ------------------------------------------------------------------------ !
844 ! The main loop of the dual-primal interior point method.
845
846 do while (epsi .gt. minimal_epsilon)
847
848 ! --------------------------------------------------------------------- !
849 ! Calculating residuals based on
850 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
851 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
852
853 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
854 pij => this%pij%x, qij => this%qij%x, &
855 low => this%low%x, upp => this%upp%x, &
856 alpha => this%alpha%x, beta => this%beta%x, &
857 c => this%c%x, d => this%d%x, &
858 a0 => this%a0, a => this%a%x, &
859 bi => this%bi%x)
860 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
861 ! the initial value of λ
862
863 ! Comput the value of y that minimizes L_y for the current λ
864 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ])
865 ! dL_y/dy =0 => y= (λ_i - c_i)/d_i, ensure y>=0
866 do i=1, this%m
867 if (abs(d(i)) < neko_eps) then
868 ! to avoid devision by zero in case d=0
869 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (1.0e-8_rp))
870 ! y(i) = merge(0.0_rp, 1.0_rp, (lambda(i) - c(i)) >= 0.0_rp)
871 else
872 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (d(i)))
873 end if
874 end do
875
876 ! Comput the value of z that minimizes L_z for the current λ
877 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z)
878 ! if (a_0-dot_product(lambda, a)>=0) z=0 else z= 1.0
879 ! ensure z>=0
880 z = merge(0.0_rp, 1.0_rp, a0 - dot_product(lambda, a) >= 0.0_rp)
881
882 ! Comput the value of x that minimizes L_x for the current λ
883 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i *
884 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
885 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
886 pjlambda = (p0j + matmul(transpose(pij), lambda))
887 qjlambda = (q0j + matmul(transpose(qij), lambda))
888 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
889 (sqrt(pjlambda) + sqrt(qjlambda))
890
891 ! Ensure that x is feasible (alpha<=x<=beta)
892 x = merge(alpha, x, x .lt. alpha)
893 x = merge(beta, x, x .gt. beta)
894
895 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
896 relambda = matmul(pij, 1.0_rp / (upp - x)) + &
897 matmul(qij, 1.0_rp / (x - low))
898
899 ! Global comminucation for relambda values
900 call mpi_allreduce(mpi_in_place, relambda, this%m, &
901 mpi_real_precision, mpi_sum, neko_comm, ierr)
902 relambda = relambda - bi - y - a * z + mu
903
904 ! Compute residual for mu (eta in the paper)
905 remu = mu * lambda - epsi
906
907 residual_max = maxval(abs([relambda, remu]))
908
909 ! ------------------------------------------------------------------- !
910 ! Internal loop
911 do iter = 1, this%max_iter
912
913 !Check the condition
914 if (residual_max .lt. epsi) exit
915
916 ! Compute dL(x, y, z, λ)/dλ for the updated x(λ), y(λ), z(λ)
917
918 gradlambda = matmul(pij, 1.0_rp / (upp - x)) + &
919 matmul(qij, 1.0_rp/(x - low))
920
921 ! Global comminucation for gradlambda values
922 call mpi_allreduce(mpi_in_place, gradlambda, this%m, &
923 mpi_real_precision, mpi_sum, neko_comm, ierr)
924 gradlambda = gradlambda - bi - y - a * z
925
926 ! Update gradlambda as the right hand side for Newton's method(eq10)
927 gradlambda = - gradlambda - epsi / lambda
928
929 ! Computing the Hessian as in equation (13) in
930 !! https://doi.org/10.1007/s00158-012-0869-2
931
932 !--------------contributions of x terms to Hess--------------------!
933 ljjxinv= - 1.0_rp / ( (2.0_rp * pjlambda/(upp - x)**3) + &
934 (2.0_rp * qjlambda/(x - low)**3))
935
936
937 ! Remove the sensitivity for the active primal constraints
938 ljjxinv = merge(0.0_rp, ljjxinv, x - alpha < neko_eps)
939 ljjxinv = merge(0.0_rp, ljjxinv, beta - x < neko_eps)
940
941 do i = 1, this%m
942 hijx(i,:) = pij(i,:) / (upp - x)**2 &
943 - qij(i,:) / (x - low)**2
944 end do
945
946 hess = 0.0_rp
947 ! Direct computation of the matrix multiplication
948 ! (for better performance)
949 do i = 1, this%m
950 do j = 1, this%m
951 ! Compute the (i, j) element of AA
952 do k = 1, this%n !this n is global
953 hess(i, j) = hess(i, j) &
954 + hijx(i, k) * (ljjxinv(k)) * hijx(j, k)
955 end do
956 end do
957 end do
958
959 call mpi_allreduce(mpi_in_place, hess, &
960 this%m*this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
961
962 !---------------contributions of z terms to Hess-------------------!
963 ! There is no contibution to the Hess from z terms as z terms are
964 ! linear w.r.t λ
965
966 !---------------contributions of y terms to Hess-------------------!
967 ! Only for inactive constraint, we consider contributions to Hess.
968 ! Note that if d(i) = 0, the y terms (just like z terms) will not
969 ! contribute to the Hessian matrix.
970 do i = 1, this%m
971 if (y(i) .gt. 0.0_rp) then
972 if (abs(d(i)) < neko_eps) then
973 ! Hess(i, i) = Hess(i, i) - 1.0_rp/1.0e-8_rp
974 else
975 hess(i, i) = hess(i, i) - 1.0_rp/d(i)
976 end if
977 end if
978 ! Based on eq(10), note the term (-\Omega \Lambda)
979 hess(i, i) = hess(i, i) - mu(i) / lambda(i)
980 end do
981
982 ! Improve the robustness by stablizing the Hess using
983 ! Levenberg-Marquardt algorithm (heuristically)
984 hesstrace = 0.0_rp
985 do i=1, this%m
986 hesstrace = hesstrace + hess(i, i)
987 end do
988 do i=1, this%m
989 hess(i,i) = hess(i, i) - &
990 max(-1.0e-4_rp*hesstrace/this%m, 1.0e-7_rp)
991 end do
992
993 call dgesv(this%m , 1, hess, this%m , ipiv, &
994 gradlambda, this%m, info)
995
996 if (info .ne. 0) then
997 call neko_error("DGESV failed to solve the linear system in " // &
998 "mma_subsolve_dip.")
999 end if
1000 dlambda = gradlambda
1001
1002 ! based on eq(11) for delta eta
1003 dmu = -mu + epsi / lambda - dlambda * mu / lambda
1004
1005 ! Compute the stepsize and update lambda and mu (eta in the paper)
1006
1007 steg = 1.005_rp
1008 do i = 1, this%m
1009 steg = merge(-1.01_rp * dlambda(i) / lambda(i), steg, &
1010 steg < -1.01_rp * dlambda(i) / lambda(i))
1011 steg = merge(-1.01_rp * dmu(i) / mu(i), &
1012 steg, steg < -1.01_rp * dmu(i) / mu(i))
1013 end do
1014
1015 steg = 1.0_rp / steg
1016
1017 lambda = lambda + steg*dlambda
1018 mu = mu + steg*dmu
1019
1020 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
1021 ! the updated values of λ
1022
1023 ! Comput the value of y that minimizes L_y for the current λ
1024 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ])
1025 ! dL_y/dy =0 => y= (λ_i - c_i)/d_i, ensure y>=0
1026 do i=1, this%m
1027 if (abs(d(i)) < neko_eps) then
1028 ! to avoid devision by zero in case d=0
1029 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (1.0e-8_rp))
1030 ! y(i) = merge(0.0_rp, 1.0_rp, (lambda(i) - c(i)) >= 0.0_rp)
1031 else
1032 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (d(i)))
1033 end if
1034 end do
1035
1036 ! Comput the value of z that minimizes L_z for the current λ
1037 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z)
1038 ! if (a_0-dot_product(lambda, a)>=0) z=0 else z= 1.0
1039 ! ensure z>=0
1040 z = merge(0.0_rp, 1.0_rp, a0 - dot_product(lambda, a) >= 0.0_rp)
1041
1042 ! Comput the value of x that minimizes L_x for the current λ
1043 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i *
1044 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
1045 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
1046 pjlambda = (p0j + matmul(transpose(pij), lambda))
1047 qjlambda = (q0j + matmul(transpose(qij), lambda))
1048 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
1049 (sqrt(pjlambda) + sqrt(qjlambda))
1050
1051 ! Ensure that x is feasible (alpha<=x<=beta)
1052 x = merge(alpha, x, x .lt. alpha)
1053 x = merge(beta, x, x .gt. beta)
1054
1055 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
1056 relambda = matmul(pij, 1.0_rp / (upp - x)) + &
1057 matmul(qij, 1.0_rp / (x - low))
1058 ! Global comminucation for relambda values
1059 call mpi_allreduce(mpi_in_place, relambda, this%m, &
1060 mpi_real_precision, mpi_sum, neko_comm, ierr)
1061 relambda = relambda - bi - y - a * z + mu
1062
1063 ! Compute residual for mu (eta in the paper)
1064 remu = mu * lambda - epsi
1065
1066 residual_max = maxval(abs([relambda, remu]))
1067 end do
1068 end associate
1069
1070 epsi = 0.1_rp * epsi
1071 end do
1072
1073 ! Save the new designx
1074 this%xold2%x = this%xold1%x
1075 this%xold1%x = designx
1076 designx = x
1077
1078 !update the parameters of the MMA object nesessary to compute KKT residual
1079
1080 this%y%x = y
1081 this%z = z
1082 this%lambda%x = lambda
1083 this%mu%x = mu
1084 end subroutine mma_subsolve_dip_cpu
1085
1086end submodule mma_cpu
MMA module.
Definition mma.f90:69