Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
mma_cpu.f90
1! Copyright (c) 2025, The Neko-TOP Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32
33submodule(mma) mma_cpu
34 use lapack_interfaces, only: dgesv
35 use mpi_f08, only: mpi_in_place, mpi_max, mpi_min
36 use comm, only: neko_comm, pe_rank, mpi_real_precision
37 use math, only: neko_eps
38 use profiler, only: profiler_start_region, profiler_end_region
39
40 implicit none
41
42contains
43
44 module subroutine mma_update_cpu(this, iter, x, df0dx, fval, dfdx)
45 ! ----------------------------------------------------- !
46 ! Update the design variable x by solving the convex !
47 ! approximation of the problem. !
48 ! !
49 ! This subroutine is called in each iteration of the !
50 ! optimization loop !
51 ! ----------------------------------------------------- !
52 class(mma_t), intent(inout) :: this
53 integer, intent(in) :: iter
54 real(kind=rp), dimension(this%n), intent(inout) :: x
55 real(kind=rp), dimension(this%n), intent(in) :: df0dx
56 real(kind=rp), dimension(this%m), intent(in) :: fval
57 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
58
59 if (.not. this%is_initialized) then
60 call neko_error("The MMA object is not initialized.")
61 end if
62
63 call profiler_start_region("MMA gensub")
64 ! generate a convex approximation of the problem
65 call mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
66 call profiler_end_region("MMA gensub")
67
68 !solve the approximation problem using interior point method
69 call profiler_start_region("MMA subsolve")
70 if (this%subsolver .eq. "dip") then
71 call mma_subsolve_dip_cpu(this, x)
72 else
73 call mma_subsolve_dpip_cpu(this, x)
74 end if
75 call profiler_end_region("MMA subsolve")
76
77 this%is_updated = .true.
78 end subroutine mma_update_cpu
79
81 module subroutine mma_kkt_cpu(this, x, df0dx, fval, dfdx)
82 ! ----------------------------------------------------- !
83 ! Compute the KKT condition right hand side for a given !
84 ! designx x and set the max and norm values of the !
85 ! residue of KKT system to this%residumax and !
86 ! this%residunorm. !
87 ! !
88 ! The left hand sides of the KKT conditions are computed!
89 ! for the following nonlinear programming problem: !
90 ! Minimize f_0(x) + a_0*z + !
91 ! sum(c_i*y_i + 0.5*d_i*(y_i)^2)!
92 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
93 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
94 ! z >= 0, y_i >= 0, i = 1,...,m !
95 ! !
96 ! !
97 ! Note that before calling this function, the function !
98 ! values (f0val, fval, dfdx, ...) should be updated !
99 ! using the new x values. !
100 ! ----------------------------------------------------- !
101 class(mma_t), intent(inout) :: this
102 real(kind=rp), dimension(this%n), intent(in) :: x
103 real(kind=rp), dimension(this%n), intent(in) :: df0dx
104 real(kind=rp), dimension(this%m), intent(in) :: fval
105 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
106
107 if (this%subsolver .eq. "dip") then
108 call mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
109 else
110 call mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
111 end if
112 end subroutine mma_kkt_cpu
113
115 ! point method (dpip) subsolve of MMA algorithm.
116 module subroutine mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
117 ! ----------------------------------------------------- !
118 ! Compute the KKT condition right hand side for a given !
119 ! designx x and set the max and norm values of the !
120 ! residue of KKT system to this%residumax and !
121 ! this%residunorm. !
122 ! !
123 ! The left hand sides of the KKT conditions are computed!
124 ! for the following nonlinear programming problem: !
125 ! Minimize f_0(x) + a_0*z + !
126 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
127 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
128 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
129 ! z >= 0, y_i >= 0, i = 1,...,m !
130 ! !
131 ! !
132 ! Note that before calling this function, the function !
133 ! values (f0val, fval, dfdx, ...) should be updated !
134 ! using the new x values. !
135 ! ----------------------------------------------------- !
136 class(mma_t), intent(inout) :: this
137 real(kind=rp), dimension(this%n), intent(in) :: x
138 real(kind=rp), dimension(this%n), intent(in) :: df0dx
139 real(kind=rp), dimension(this%m), intent(in) :: fval
140 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
141
142 real(kind=rp) :: rez, rezeta
143 real(kind=rp), dimension(this%m) :: rey, relambda, remu, res
144 real(kind=rp), dimension(this%n) :: rex, rexsi, reeta
145 real(kind=rp), dimension(3*this%n+4*this%m+2) :: residual
146
147 real(kind=rp), dimension(4*this%m+2) :: residual_small
148 integer :: ierr
149 real(kind=rp) :: re_sq_norm
150
151 rex = df0dx + matmul(transpose(dfdx), this%lambda%x) &
152 - this%xsi%x + this%eta%x
153 rey = this%c%x + this%d%x*this%y%x - this%lambda%x - this%mu%x
154 rez = this%a0 - this%zeta - dot_product(this%lambda%x, this%a%x)
155
156 relambda = fval - this%a%x * this%z - this%y%x + this%s%x
157 rexsi = this%xsi%x * (x - this%xmin%x)
158 reeta = this%eta%x * (this%xmax%x - x)
159 remu = this%mu%x * this%y%x
160 rezeta = this%zeta * this%z
161 res = this%lambda%x * this%s%x
162
163 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
164 residual_small = [rey, rez, relambda, remu, rezeta, res]
165
166 this%residumax = maxval(abs(residual))
167 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
168
169 call mpi_allreduce(mpi_in_place, this%residumax, 1, &
170 mpi_real_precision, mpi_max, neko_comm, ierr)
171
172 call mpi_allreduce(mpi_in_place, re_sq_norm, 1, &
173 mpi_real_precision, mpi_sum, neko_comm, ierr)
174
175 this%residunorm = sqrt(norm2(residual_small)**2 + re_sq_norm)
176 end subroutine mma_dpip_kkt_cpu
177
179 ! point method (dip) subsolve of MMA algorithm.
180 module subroutine mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
181 ! ----------------------------------------------------- !
182 ! Compute the KKT condition right hand side for a given !
183 ! designx x and set the max and norm values of the !
184 ! residue of KKT system to this%residumax and !
185 ! this%residunorm. !
186 ! !
187 ! The left hand sides of the KKT conditions are computed!
188 ! for the following nonlinear programming problem: !
189 ! Minimize f_0(x) + a_0*z + !
190 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
191 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
192 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
193 ! z >= 0, y_i >= 0, i = 1,...,m !
194 ! !
195 ! !
196 ! Note that before calling this function, the function !
197 ! values (f0val, fval, dfdx, ...) should be updated !
198 ! using the new x values. !
199 ! ----------------------------------------------------- !
200 class(mma_t), intent(inout) :: this
201 real(kind=rp), dimension(this%n), intent(in) :: x
202 real(kind=rp), dimension(this%n), intent(in) :: df0dx
203 real(kind=rp), dimension(this%m), intent(in) :: fval
204 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
205
206 real(kind=rp), dimension(this%m) :: relambda, remu
207 real(kind=rp), dimension(2*this%m) :: residual
208
209
210 relambda = fval - this%a%x * this%z - this%y%x + this%mu%x
211 ! Compute residual for mu (eta in the paper)
212 remu = this%lambda%x * this%mu%x
213
214 residual = abs([relambda, remu])
215 this%residumax = maxval(residual)
216 this%residunorm = norm2(residual)
217
218 end subroutine mma_dip_kkt_cpu
219
220 !============================================================================!
221 ! private internal subroutines
222
224 subroutine mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
225 ! ----------------------------------------------------- !
226 ! Generate the approximation sub problem by computing !
227 ! the lower and upper asymtotes and the other necessary !
228 ! parameters (alpha, beta, p0j, q0j, pij, qij, ...). !
229 ! ----------------------------------------------------- !
230 class(mma_t), intent(inout) :: this
231 real(kind=rp), dimension(this%n), intent(in) :: x
232 real(kind=rp), dimension(this%n), intent(in) :: df0dx
233 real(kind=rp), dimension(this%m), intent(in) :: fval
234 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
235 integer, intent(in) :: iter
236 integer :: i, j, ierr
237 real(kind=rp), dimension(this%n) :: x_diff
238 real(kind=rp) :: asy_factor
239
240 x_diff = this%xmax%x - this%xmin%x
241
242 ! ------------------------------------------------------------------------ !
243 ! Setup the current asymptotes
244 associate(low => this%low%x, upp => this%upp%x, &
245 x_1 => this%xold1%x, x_2 => this%xold2%x)
246
247 if (iter .lt. 3) then
248 ! Initialize the lower and upper asymptotes
249 low = x - this%asyinit * x_diff
250 upp = x + this%asyinit * x_diff
251 else
252 do j = 1, this%n
253 if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .lt. 0.0_rp) then
254 asy_factor = this%asydecr
255 else if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .gt. 0.0_rp) then
256 asy_factor = this%asyincr
257 else
258 asy_factor = 1.0_rp
259 end if
260
261 low(j) = x(j) - asy_factor * (x_1(j) - low(j))
262 upp(j) = x(j) + asy_factor * (upp(j) - x_1(j))
263 end do
264
265
266 ! Setting a minimum and maximum for the low and upp
267 ! asymptotes (eq3.9)
268 low = max(low, x - 10.0_rp * x_diff)
269 low = min(low, x - 0.01_rp * x_diff)
270
271 upp = min(upp, x + 10.0_rp * x_diff)
272 upp = max(upp, x + 0.01_rp * x_diff)
273 end if
274
275 end associate
276
277
278 ! ------------------------------------------------------------------------ !
279 ! Set the the bounds and coefficients for the approximation
280 ! the move bounds (alpha and beta) are slightly more restrictive
281 ! than low and upp. This is done based on eq(3.6)--eq(3.10).
282 ! also check
283 ! https://comsolyar.com/wp-content/uploads/2020/03/gcmma.pdf
284 ! eq (2.8) and (2.9)
285
286 associate(alpha => this%alpha%x, beta => this%beta%x, &
287 xmin => this%xmin%x, xmax => this%xmax%x, &
288 low => this%low%x, upp => this%upp%x, x => x)
289
290 alpha = max(xmin, low + 0.1_rp*(x - low), x - 0.5_rp*x_diff)
291 beta = min(xmax, upp - 0.1_rp*(upp - x), x + 0.5_rp*x_diff)
292 end associate
293
294 ! ------------------------------------------------------------------------ !
295 ! Calculate p0j, q0j, pij, qij
296 ! where j = 1,2,...,n and i = 1,2,...,m (eq(2.3)-eq(2.5))
297
298 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
299 pij => this%pij%x, qij => this%qij%x, &
300 low => this%low%x, upp => this%upp%x)
301
302 p0j = ( &
303 1.001_rp * max(df0dx, 0.0_rp) &
304 + 0.001_rp * max(-df0dx, 0.0_rp) &
305 + 0.00001_rp / max(x_diff, 0.00001_rp) &
306 ) * (upp - x)**2
307
308 q0j = ( &
309 0.001_rp * max(df0dx, 0.0_rp) &
310 + 1.001_rp * max(-df0dx, 0.0_rp) &
311 + 0.00001_rp / max(x_diff, 0.00001_rp)&
312 ) * (x - low)**2
313
314 do j = 1, this%n
315 do i = 1, this%m
316 pij(i, j) = ( &
317 1.001_rp * max(dfdx(i, j), 0.0_rp) &
318 + 0.001_rp * max(-dfdx(i, j), 0.0_rp) &
319 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
320 ) * (upp(j) - x(j))**2
321
322 qij(i, j) = ( &
323 0.001_rp * max(dfdx(i, j), 0.0_rp) &
324 + 1.001_rp * max(-dfdx(i, j), 0.0_rp) &
325 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
326 ) * (x(j) - low(j))**2
327 end do
328 end do
329
330 end associate
331
332 ! ------------------------------------------------------------------------ !
333 ! Computing bi as defined in page 5
334
335 associate(bi => this%bi%x, &
336 pij => this%pij%x, qij => this%qij%x, &
337 low => this%low%x, upp => this%upp%x)
338
339 bi = 0.0_rp
340 do i = 1, this%m
341 do j = 1, this%n
342 bi(i) = bi(i) &
343 + pij(i, j) / (upp(j) - x(j)) &
344 + qij(i, j) / (x(j) - low(j))
345 end do
346 end do
347
348 call mpi_allreduce(mpi_in_place, bi, this%m, &
349 mpi_real_precision, mpi_sum, neko_comm, ierr)
350 bi = bi - fval
351
352 end associate
353 end subroutine mma_gensub_cpu
354
357 subroutine mma_subsolve_dpip_cpu(this, designx)
358 ! ------------------------------------------------------- !
359 ! Dual-primal interior point method using Newton's step !
360 ! to solve MMA sub problem. !
361 ! A Backtracking Line Search approach is used to compute !
362 ! the step size; starting with the full Newton's step !
363 ! (delta = 1) and dividing by 2 until we have a step size !
364 ! that leads to a feasible point while ensuring a !
365 ! decrease in the residue. !
366 ! ------------------------------------------------------- !
367
368 class(mma_t), intent(inout) :: this
369 real(kind=rp), dimension(this%n), intent(inout) :: designx
370 ! Note that there is a local dummy "x" in this subroutine, thus, we call
371 ! the current design "designx" instead of just "x"
372 integer :: i, j, k, iter, itto, ierr
373 real(kind=rp) :: epsi, residual_max, residual_norm, &
374 z, zeta, rez, rezeta, &
375 delz, dz, dzeta, &
376 steg, zold, zetaold, new_residual
377 real(kind=rp), dimension(this%m) :: y, lambda, s, mu, &
378 rey, relambda, remu, res, &
379 dely, dellambda, &
380 dy, dlambda, ds, dmu, &
381 yold, lambdaold, sold, muold
382 real(kind=rp), dimension(this%n) :: x, xsi, eta, &
383 rex, rexsi, reeta, &
384 delx, diagx, dx, dxsi, deta, &
385 xold, xsiold, etaold
386 real(kind=rp), dimension(4*this%m + 2) :: residual_small
387 real(kind=rp), dimension(3*this%n + 4*this%m + 2) :: residual
388 real(kind=rp), dimension(2*this%n + 4*this%m + 2) :: xx, dxx
389
390 real(kind=rp), dimension(this%m, this%n) :: gg
391 real(kind=rp), dimension(this%m+1) :: bb
392 real(kind=rp), dimension(this%m+1, this%m+1) :: aa
393 real(kind=rp), dimension(this%m * this%m) :: aa_buffer
394
395 ! using DGESV in lapack to solve
396 ! the linear system which needs the following parameters
397 integer :: info
398 integer, dimension(this%m+1) :: ipiv
399
400 ! Parameters for global communication
401 real(kind=rp) :: re_sq_norm
402 real(kind=rp) :: minimal_epsilon
403
404 integer :: nglobal
405
406 ! ------------------------------------------------------------------------ !
407 ! initial value for the parameters in the subsolve based on
408 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
409
410 epsi = 1.0_rp !100
411 x = 0.5_rp * (this%alpha%x + this%beta%x)
412 y = 1.0_rp
413 z = 1.0_rp
414 zeta = 1.0_rp
415 lambda = 1.0_rp
416 s = 1.0_rp
417 xsi = max(1.0_rp, 1.0_rp / (x - this%alpha%x))
418 eta = max(1.0_rp, 1.0_rp / (this%beta%x - x))
419 mu = max(1.0_rp, 0.5_rp * this%c%x)
420
421 call mpi_allreduce(this%n, nglobal, 1, &
422 mpi_integer, mpi_sum, neko_comm, ierr)
423
424 ! ------------------------------------------------------------------------ !
425 ! Computing the minimal epsilon and choose the most conservative one
426
427 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
428 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
429 mpi_real_precision, mpi_min, neko_comm, ierr)
430
431 ! ------------------------------------------------------------------------ !
432 ! The main loop of the dual-primal interior point method.
433
434 do while (epsi .gt. minimal_epsilon)
435
436 ! --------------------------------------------------------------------- !
437 ! Calculating residuals based on
438 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
439 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
440
441 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
442 pij => this%pij%x, qij => this%qij%x, &
443 low => this%low%x, upp => this%upp%x, &
444 alpha => this%alpha%x, beta => this%beta%x, &
445 c => this%c%x, d => this%d%x, &
446 a0 => this%a0, a => this%a%x, &
447 bi => this%bi%x)
448
449 rex = (p0j + matmul(transpose(pij), lambda)) / (upp - x)**2 &
450 - (q0j + matmul(transpose(qij), lambda)) / (x - low)**2 &
451 - xsi + eta
452
453 rey = c + d * y - lambda - mu
454 rez = a0 - zeta - dot_product(lambda, a)
455
456 relambda = 0.0_rp
457 do i = 1, this%m
458 do j = 1, this%n
459 ! Accumulate sums for relambda (the term gi(x))
460 relambda(i) = relambda(i) &
461 + pij(i, j) / (upp(j) - x(j)) &
462 + qij(i, j) / (x(j) - low(j))
463 end do
464 end do
465
466 end associate
467
468 ! --------------------------------------------------------------------- !
469 ! Computing the norm of the residuals
470
471 ! Complete the computations of lambda residuals
472 call mpi_allreduce(mpi_in_place, relambda, this%m, &
473 mpi_real_precision, mpi_sum, neko_comm, ierr)
474 relambda = relambda - this%a%x*z - y + s - this%bi%x
475
476 rexsi = xsi * (x - this%alpha%x) - epsi
477 reeta = eta * (this%beta%x - x) - epsi
478 remu = mu * y - epsi
479 rezeta = zeta * z - epsi
480 res = lambda * s - epsi
481
482 ! Setup vectors of residuals and their norms
483 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
484 residual_small = [rey, rez, relambda, remu, rezeta, res]
485
486 residual_max = maxval(abs(residual))
487 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
488
489 call mpi_allreduce(mpi_in_place, residual_max, 1, &
490 mpi_real_precision, mpi_max, neko_comm, ierr)
491
492 call mpi_allreduce(mpi_in_place, re_sq_norm, &
493 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
494
495 residual_norm = sqrt(norm2(residual_small)**2 + re_sq_norm)
496
497 ! --------------------------------------------------------------------- !
498 ! Internal loop
499
500 do iter = 1, this%max_iter
501
502 !Check the condition
503 if (residual_max .lt. epsi) exit
504
505 delx = 0.0_rp
506 do j = 1, this%n
507 do i = 1, this%m
508 delx(j) = delx(j) &
509 + this%pij%x(i,j) * lambda(i) / (this%upp%x(j) - x(j))**2 &
510 - this%qij%x(i,j) * lambda(i) / (x(j) - this%low%x(j))**2
511 end do
512 end do
513
514 delx = delx &
515 + this%p0j%x / (this%upp%x - x)**2 &
516 - this%q0j%x / (x - this%low%x)**2 &
517 - epsi / (x - this%alpha%x) &
518 + epsi / (this%beta%x - x)
519
520 dely = this%c%x + this%d%x * y - lambda - epsi / y
521 delz = this%a0 - dot_product(lambda, this%a%x) - epsi / z
522
523 ! Accumulate sums for dellambda (the term gi(x))
524 dellambda = 0.0_rp
525 do i = 1, this%m
526 do j = 1, this%n
527 dellambda(i) = dellambda(i) &
528 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
529 + this%qij%x(i, j) / (x(j) - this%low%x(j))
530 end do
531 end do
532
533 call mpi_allreduce(mpi_in_place, dellambda, this%m, &
534 mpi_real_precision, mpi_sum, neko_comm, ierr)
535
536 dellambda = dellambda - this%a%x*z - y - this%bi%x + epsi / lambda
537
538 do i = 1, this%m
539 gg(i,:) = this%pij%x(i,:) / (this%upp%x - x)**2 &
540 - this%qij%x(i,:) / (x - this%low%x)**2
541 end do
542
543 diagx = &
544 (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
545 / (this%upp%x - x)**3 &
546 + (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
547 / (x - this%low%x)**3
548
549 diagx = 2.0_rp * diagx &
550 + xsi / (x - this%alpha%x) &
551 + eta / (this%beta%x - x)
552
553
554 !Here we only consider the case m<n in the matlab code
555 !assembling the right hand side matrix based on eq(5.20)
556 ! bb = [dellambda + dely/(this%d%x + &
557 ! (mu/y)) - matmul(GG,delx/diagx), delz ]
558
559 !--------------------------------------------------------------------!
560 ! for MPI computation of bb
561
562 bb = 0.0_rp
563 do i = 1, this%m
564 do j = 1, this%n
565 bb(i) = bb(i) + gg(i, j) * (delx(j) / diagx(j))
566 end do
567 end do
568
569 call mpi_allreduce(mpi_in_place, bb, this%m, &
570 mpi_real_precision, mpi_sum, neko_comm, ierr)
571
572 bb(1:this%m) = dellambda + dely / (this%d%x + mu / y) - bb(1:this%m)
573 bb(this%m + 1) = delz
574
575 !--------------------------------------------------------------------!
576 ! assembling the coefficients matrix AA based on eq(5.20)
577 ! AA(1:this%m,1:this%m) = &
578 ! matmul(matmul(GG,mma_diag(1/diagx)), transpose(GG))
579 ! !update diag(AA)
580 ! AA(1:this%m,1:this%m) = AA(1:this%m,1:this%m) + &
581 ! mma_diag(s/lambda + 1.0/(this%d%x + (mu/y)))
582
583 aa = 0.0_rp
584 ! Direct computation of the matrix multiplication
585 ! (for better performance)
586 do i = 1, this%m
587 do j = 1, this%m
588 ! Compute the (i, j) element of AA
589 do k = 1, this%n !this n is global
590 aa(i, j) = aa(i, j) &
591 + gg(i, k) * (1.0_rp / diagx(k)) * gg(j, k)
592 end do
593 end do
594 end do
595
596 aa_buffer = reshape(aa(1:this%m, 1:this%m), [this%m * this%m])
597
598 call mpi_allreduce(mpi_in_place, aa_buffer, &
599 this%m*this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
600
601 aa(1:this%m, 1:this%m) = reshape(aa_buffer, [this%m, this%m])
602
603 do i = 1, this%m
604 ! update the diag AA
605 aa(i, i) = aa(i, i) &
606 + s(i) / lambda(i) &
607 + 1.0_rp / (this%d%x(i) + mu(i) / y(i))
608 end do
609
610 aa(1:this%m, this%m+1) = this%a%x
611 aa(this%m+1, 1:this%m) = this%a%x
612 aa(this%m+1, this%m+1) = - zeta/z
613
614 call dgesv(this%m + 1, 1, aa, this%m + 1, ipiv, bb, this%m + 1, info)
615
616 if (info .ne. 0) then
617 call neko_error("DGESV failed to solve the linear system in " // &
618 "mma_subsolve_dpip.")
619 end if
620
621
622 dlambda = bb(1:this%m)
623 dz = bb(this%m + 1)
624
625 ! based on eq(5.19)
626 dx = - delx / diagx - matmul(transpose(gg), dlambda) / diagx
627 dy = (-dely + dlambda) / (this%d%x + mu / y)
628
629 dxsi = -xsi + (epsi - dx * xsi) / (x - this%alpha%x)
630 deta = -eta + (epsi + dx * eta) / (this%beta%x - x)
631 dmu = -mu + (epsi - mu * dy) / y
632 dzeta = -zeta + (epsi - zeta * dz) / z
633 ds = -s + (epsi - dlambda * s) / lambda
634
635 dxx = [dy, dz, dlambda, dxsi, deta, dmu, dzeta, ds]
636 xx = [y, z, lambda, xsi, eta, mu, zeta, s]
637
638 steg = 1.0_rp / maxval([ &
639 1.0_rp, &
640 -1.01_rp * dxx / xx, &
641 -1.01_rp * dx / (x - this%alpha%x), &
642 1.01_rp * dx / (this%beta%x - x) &
643 ])
644
645 ! Save the old values
646 xold = x
647 yold = y
648 zold = z
649 lambdaold = lambda
650 xsiold = xsi
651 etaold = eta
652 muold = mu
653 zetaold = zeta
654 sold = s
655
656 new_residual = 2.0_rp * residual_norm
657
658 ! Share the new_residual and steg values
659 call mpi_allreduce(mpi_in_place, steg, 1, &
660 mpi_real_precision, mpi_min, neko_comm, ierr)
661 call mpi_allreduce(mpi_in_place, new_residual, 1, &
662 mpi_real_precision, mpi_min, neko_comm, ierr)
663
664 ! The innermost loop to determine the suitable step length
665 ! using the Backtracking Line Search approach
666 itto = 0
667 do while ((new_residual .gt. residual_norm) .and. (itto .lt. 50))
668 itto = itto + 1
669
670 ! update the variables
671 x = xold + steg*dx
672 y = yold + steg*dy
673 z = zold + steg*dz
674
675 lambda = lambdaold + steg*dlambda
676 xsi = xsiold + steg*dxsi
677 eta = etaold + steg*deta
678 mu = muold + steg*dmu
679 zeta = zetaold + steg*dzeta
680 s = sold + steg*ds
681
682 ! Recompute the new_residual to see if this stepsize improves
683 ! the residue
684 rex = (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
685 / (this%upp%x - x)**2 &
686 - (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
687 / (x - this%low%x)**2 &
688 - xsi + eta
689
690 rey = this%c%x + this%d%x*y - lambda - mu
691 rez = this%a0 - zeta - dot_product(lambda, this%a%x)
692
693 ! Accumulate sums for relambda (the term gi(x))
694 relambda = 0.0_rp
695 do i = 1, this%m
696 do j = 1, this%n
697 relambda(i) = relambda(i) &
698 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
699 + this%qij%x(i, j) / (x(j) - this%low%x(j))
700 end do
701 end do
702
703 call mpi_allreduce(mpi_in_place, relambda, this%m, &
704 mpi_real_precision, mpi_sum, neko_comm, ierr)
705
706 relambda = relambda - this%a%x*z - y + s - this%bi%x
707
708 rexsi = xsi * (x - this%alpha%x) - epsi
709 reeta = eta * (this%beta%x - x) - epsi
710 remu = mu * y - epsi
711 rezeta = zeta * z - epsi
712 res = lambda * s - epsi
713
714 ! Compute squared norms for the residuals
715 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
716 call mpi_allreduce(mpi_in_place, re_sq_norm, &
717 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
718
719 residual_small = [rey, rez, relambda, remu, rezeta, res]
720 new_residual = sqrt(norm2(residual_small)**2 + re_sq_norm)
721
722 steg = steg / 2.0_rp
723 end do
724 steg = 2.0_rp * steg ! Correction for the final division by 2
725
726 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
727
728 ! Update the maximum and norm of the residuals
729 residual_norm = new_residual
730 residual_max = maxval(abs(residual))
731 call mpi_allreduce(mpi_in_place, residual_max, 1, &
732 mpi_real_precision, mpi_max, neko_comm, ierr)
733 end do
734
735 epsi = 0.1_rp * epsi
736 end do
737
738 ! Save the new designx
739 this%xold2%x = this%xold1%x
740 this%xold1%x = designx
741 designx = x
742
743 !update the parameters of the MMA object nesessary to compute KKT residual
744 this%y%x = y
745 this%z = z
746 this%lambda%x = lambda
747 this%zeta = zeta
748 this%xsi%x = xsi
749 this%eta%x = eta
750 this%mu%x = mu
751 this%s%x = s
752
753 end subroutine mma_subsolve_dpip_cpu
754
757 subroutine mma_subsolve_dip_cpu(this, designx)
758 ! ------------------------------------------------------------------------ !
759 ! -------------------------------Dual Solver------------------------------ !
760 ! ------------------------------------------------------------------------ !
761 ! This implementation is based on: !
762 ! https://doi.org/10.1007/s00158-012-0869-2 !
763 ! Definition of the Lagrangian function: !
764 ! !
765 ! L(x, y, z, λ) = !
766 ! sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i * p_{ij}) / (u_j - x_j)!
767 ! + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) / (x_j - l_j) ]!
768 ! - sum_{i=1}^{m} λ_i * b_i !
769 ! + sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ] !
770 ! + (a_0 - sum_{i=1}^{m} λ_i * a_i) * z !
771 ! !
772 ! Breakdown of terms: !
773 ! - Terms related to x: L_x (the first three lines of L(x, y, z, λ)) !
774 ! - Terms related to y: L_y (the fourth line of L(x, y, z, λ)) !
775 ! - Terms related to z: L_z (the last line of L(x, y, z, λ)) !
776 ! !
777 ! Optimization problem if λ is given: !
778 ! !
779 ! Minimize L(x, y, z, λ) !
780 ! subject to: α_j ≤ x_j ≤ β_j, z ≥ 0, and y_i ≥ 0 for all i, j. !
781 ! !
782 ! Since the problem is separable: !
783 ! Ψ(λ) = !
784 ! sum_{j=1}^{n} min_xj {L_x(x_j, λ) | α_j ≤ x_j ≤ β_j} !
785 ! + min_z {L_z(z, λ) | z ≥ 0} !
786 ! + sum_{i=1}^{m} min_yi {L_y(y_i, λ) | y_i ≥ 0} !
787 ! !
788 ! Maximize Ψ(λ) subject to λ_i ≥ 0 for i = 1, ..., m. !
789 ! !
790 ! ------------------------------------------------------------------------ !
791
792 class(mma_t), intent(inout) :: this
793 real(kind=rp), dimension(this%n), intent(inout) :: designx
794 ! Note that there is a local dummy "x" in this subroutine, thus, we call
795 ! the current design "designx" instead of just "x"
796 integer :: i, j, k, iter, ierr
797 real(kind=rp) :: epsi, residual_max, z, steg
798 real(kind=rp), dimension(this%m) :: y, lambda, mu, &
799 relambda, remu, dlambda, dmu, gradlambda
800 real(kind=rp), dimension(this%n) :: x, pjlambda, qjlambda
801
802 ! To compute the Hessian based on eq(13)
803 ! https://doi.org/10.1007/s00158-012-0869-2
804
805 ! inverse of a diag matrix:
806 real(kind=rp), dimension(this%n) :: ljjxinv ! [∇_x^2 Ljj]−1
807 real(kind=rp), dimension(this%m,this%n) :: hijx ! ∇_x hij
808 real(kind=rp), dimension(this%m,this%m) :: hess
809 real(kind=rp) :: hesstrace
810
811
812 ! using DGESV in lapack to solve
813 ! the linear system which needs the following parameters
814 integer :: info
815 integer, dimension(this%m+1) :: ipiv
816
817 ! Parameters for global communication
818 real(kind=rp) :: minimal_epsilon
819
820 integer :: nglobal
821
822 ! ------------------------------------------------------------------------ !
823 ! initial value for the parameters in the subsolve based on
824 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
825
826 epsi = 1.0_rp !100
827 ! x = 0.5_rp * (this%alpha%x + this%beta%x)
828 y = 1.0_rp
829 z = 0.0_rp
830 lambda = max(1.0_rp, 0.5_rp * this%c%x)
831 mu = 1.0_rp !this parameter is eta in Niel's paper
832 ! note that mu in the paper translates to epsi in the code following the
833 ! same style as the Cpp code by Neils
834
835 call mpi_allreduce(this%n, nglobal, 1, &
836 mpi_integer, mpi_sum, neko_comm, ierr)
837
838 ! ------------------------------------------------------------------------ !
839 ! Computing the minimal epsilon and choose the most conservative one
840
841 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
842 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
843 mpi_real_precision, mpi_min, neko_comm, ierr)
844
845 ! ------------------------------------------------------------------------ !
846 ! The main loop of the dual-primal interior point method.
847
848 do while (epsi .gt. minimal_epsilon)
849
850 ! --------------------------------------------------------------------- !
851 ! Calculating residuals based on
852 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
853 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
854
855 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
856 pij => this%pij%x, qij => this%qij%x, &
857 low => this%low%x, upp => this%upp%x, &
858 alpha => this%alpha%x, beta => this%beta%x, &
859 c => this%c%x, d => this%d%x, &
860 a0 => this%a0, a => this%a%x, &
861 bi => this%bi%x)
862 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
863 ! the initial value of λ
864
865 ! Comput the value of y that minimizes L_y for the current λ
866 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ])
867 ! dL_y/dy =0 => y= (λ_i - c_i)/d_i, ensure y>=0
868 do i=1, this%m
869 if (abs(d(i)) < neko_eps) then
870 ! to avoid devision by zero in case d=0
871 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (1.0e-8_rp))
872 ! y(i) = merge(0.0_rp, 1.0_rp, (lambda(i) - c(i)) >= 0.0_rp)
873 else
874 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (d(i)))
875 end if
876 end do
877
878 ! Comput the value of z that minimizes L_z for the current λ
879 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z)
880 ! if (a_0-dot_product(lambda, a)>=0) z=0 else z= 1.0
881 ! ensure z>=0
882 z = merge(0.0_rp, 1.0_rp, a0 - dot_product(lambda, a) >= 0.0_rp)
883
884 ! Comput the value of x that minimizes L_x for the current λ
885 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i *
886 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
887 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
888 pjlambda = (p0j + matmul(transpose(pij), lambda))
889 qjlambda = (q0j + matmul(transpose(qij), lambda))
890 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
891 (sqrt(pjlambda) + sqrt(qjlambda))
892
893 ! Ensure that x is feasible (alpha<=x<=beta)
894 x = merge(alpha, x, x .lt. alpha)
895 x = merge(beta, x, x .gt. beta)
896
897 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
898 relambda = matmul(pij, 1.0_rp / (upp - x)) + &
899 matmul(qij, 1.0_rp / (x - low))
900
901 ! Global comminucation for relambda values
902 call mpi_allreduce(mpi_in_place, relambda, this%m, &
903 mpi_real_precision, mpi_sum, neko_comm, ierr)
904 relambda = relambda - bi - y - a * z + mu
905
906 ! Compute residual for mu (eta in the paper)
907 remu = mu * lambda - epsi
908
909 residual_max = maxval(abs([relambda, remu]))
910
911 ! ------------------------------------------------------------------- !
912 ! Internal loop
913 do iter = 1, this%max_iter
914
915 !Check the condition
916 if (residual_max .lt. epsi) exit
917
918 ! Compute dL(x, y, z, λ)/dλ for the updated x(λ), y(λ), z(λ)
919
920 gradlambda = matmul(pij, 1.0_rp / (upp - x)) + &
921 matmul(qij, 1.0_rp/(x - low))
922
923 ! Global comminucation for gradlambda values
924 call mpi_allreduce(mpi_in_place, gradlambda, this%m, &
925 mpi_real_precision, mpi_sum, neko_comm, ierr)
926 gradlambda = gradlambda - bi - y - a * z
927
928 ! Update gradlambda as the right hand side for Newton's method(eq10)
929 gradlambda = - gradlambda - epsi / lambda
930
931 ! Computing the Hessian as in equation (13) in
932 !! https://doi.org/10.1007/s00158-012-0869-2
933
934 !--------------contributions of x terms to Hess--------------------!
935 ljjxinv= - 1.0_rp / ( (2.0_rp * pjlambda/(upp - x)**3) + &
936 (2.0_rp * qjlambda/(x - low)**3))
937
938
939 ! Remove the sensitivity for the active primal constraints
940 ljjxinv = merge(0.0_rp, ljjxinv, x - alpha < neko_eps)
941 ljjxinv = merge(0.0_rp, ljjxinv, beta - x < neko_eps)
942
943 do i = 1, this%m
944 hijx(i,:) = pij(i,:) / (upp - x)**2 &
945 - qij(i,:) / (x - low)**2
946 end do
947
948 hess = 0.0_rp
949 ! Direct computation of the matrix multiplication
950 ! (for better performance)
951 do i = 1, this%m
952 do j = 1, this%m
953 ! Compute the (i, j) element of AA
954 do k = 1, this%n !this n is global
955 hess(i, j) = hess(i, j) &
956 + hijx(i, k) * (ljjxinv(k)) * hijx(j, k)
957 end do
958 end do
959 end do
960
961 call mpi_allreduce(mpi_in_place, hess, &
962 this%m*this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
963
964 !---------------contributions of z terms to Hess-------------------!
965 ! There is no contibution to the Hess from z terms as z terms are
966 ! linear w.r.t λ
967
968 !---------------contributions of y terms to Hess-------------------!
969 ! Only for inactive constraint, we consider contributions to Hess.
970 ! Note that if d(i) = 0, the y terms (just like z terms) will not
971 ! contribute to the Hessian matrix.
972 do i = 1, this%m
973 if (y(i) .gt. 0.0_rp) then
974 if (abs(d(i)) < neko_eps) then
975 ! Hess(i, i) = Hess(i, i) - 1.0_rp/1.0e-8_rp
976 else
977 hess(i, i) = hess(i, i) - 1.0_rp/d(i)
978 end if
979 end if
980 ! Based on eq(10), note the term (-\Omega \Lambda)
981 hess(i, i) = hess(i, i) - mu(i) / lambda(i)
982 end do
983
984 ! Improve the robustness by stablizing the Hess using
985 ! Levenberg-Marquardt algorithm (heuristically)
986 hesstrace = 0.0_rp
987 do i=1, this%m
988 hesstrace = hesstrace + hess(i, i)
989 end do
990 do i=1, this%m
991 hess(i,i) = hess(i, i) - &
992 max(-1.0e-4_rp*hesstrace/this%m, 1.0e-7_rp)
993 end do
994
995 call dgesv(this%m , 1, hess, this%m , ipiv, &
996 gradlambda, this%m, info)
997
998 if (info .ne. 0) then
999 call neko_error("DGESV failed to solve the linear system in " // &
1000 "mma_subsolve_dip.")
1001 end if
1002 dlambda = gradlambda
1003
1004 ! based on eq(11) for delta eta
1005 dmu = -mu + epsi / lambda - dlambda * mu / lambda
1006
1007 ! Compute the stepsize and update lambda and mu (eta in the paper)
1008
1009 steg = 1.005_rp
1010 do i = 1, this%m
1011 steg = merge(-1.01_rp * dlambda(i) / lambda(i), steg, &
1012 steg < -1.01_rp * dlambda(i) / lambda(i))
1013 steg = merge(-1.01_rp * dmu(i) / mu(i), &
1014 steg, steg < -1.01_rp * dmu(i) / mu(i))
1015 end do
1016
1017 steg = 1.0_rp / steg
1018
1019 lambda = lambda + steg*dlambda
1020 mu = mu + steg*dmu
1021
1022 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
1023 ! the updated values of λ
1024
1025 ! Comput the value of y that minimizes L_y for the current λ
1026 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ])
1027 ! dL_y/dy =0 => y= (λ_i - c_i)/d_i, ensure y>=0
1028 do i=1, this%m
1029 if (abs(d(i)) < neko_eps) then
1030 ! to avoid devision by zero in case d=0
1031 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (1.0e-8_rp))
1032 ! y(i) = merge(0.0_rp, 1.0_rp, (lambda(i) - c(i)) >= 0.0_rp)
1033 else
1034 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (d(i)))
1035 end if
1036 end do
1037
1038 ! Comput the value of z that minimizes L_z for the current λ
1039 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z)
1040 ! if (a_0-dot_product(lambda, a)>=0) z=0 else z= 1.0
1041 ! ensure z>=0
1042 z = merge(0.0_rp, 1.0_rp, a0 - dot_product(lambda, a) >= 0.0_rp)
1043
1044 ! Comput the value of x that minimizes L_x for the current λ
1045 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i *
1046 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
1047 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
1048 pjlambda = (p0j + matmul(transpose(pij), lambda))
1049 qjlambda = (q0j + matmul(transpose(qij), lambda))
1050 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
1051 (sqrt(pjlambda) + sqrt(qjlambda))
1052
1053 ! Ensure that x is feasible (alpha<=x<=beta)
1054 x = merge(alpha, x, x .lt. alpha)
1055 x = merge(beta, x, x .gt. beta)
1056
1057 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
1058 relambda = matmul(pij, 1.0_rp / (upp - x)) + &
1059 matmul(qij, 1.0_rp / (x - low))
1060 ! Global comminucation for relambda values
1061 call mpi_allreduce(mpi_in_place, relambda, this%m, &
1062 mpi_real_precision, mpi_sum, neko_comm, ierr)
1063 relambda = relambda - bi - y - a * z + mu
1064
1065 ! Compute residual for mu (eta in the paper)
1066 remu = mu * lambda - epsi
1067
1068 residual_max = maxval(abs([relambda, remu]))
1069 end do
1070 end associate
1071
1072 epsi = 0.1_rp * epsi
1073 end do
1074
1075 ! Save the new designx
1076 this%xold2%x = this%xold1%x
1077 this%xold1%x = designx
1078 designx = x
1079
1080 !update the parameters of the MMA object nesessary to compute KKT residual
1081
1082 this%y%x = y
1083 this%z = z
1084 this%lambda%x = lambda
1085 this%mu%x = mu
1086 end subroutine mma_subsolve_dip_cpu
1087
1088end submodule mma_cpu