Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
mma_cpu.f90
1! Copyright (c) 2025, The Neko-TOP Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32
33submodule(mma) mma_cpu
34 use lapack_interfaces, only: dgesv
35 use mpi_f08, only: mpi_in_place, mpi_max, mpi_min
36 use comm, only: neko_comm, pe_rank, mpi_real_precision
37 use math, only: neko_eps
38 implicit none
39
40contains
41
42 module subroutine mma_update_cpu(this, iter, x, df0dx, fval, dfdx)
43 ! ----------------------------------------------------- !
44 ! Update the design variable x by solving the convex !
45 ! approximation of the problem. !
46 ! !
47 ! This subroutine is called in each iteration of the !
48 ! optimization loop !
49 ! ----------------------------------------------------- !
50 class(mma_t), intent(inout) :: this
51 integer, intent(in) :: iter
52 real(kind=rp), dimension(this%n), intent(inout) :: x
53 real(kind=rp), dimension(this%n), intent(in) :: df0dx
54 real(kind=rp), dimension(this%m), intent(in) :: fval
55 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
56
57 if (.not. this%is_initialized) then
58 call neko_error("The MMA object is not initialized.")
59 end if
60
61 ! generate a convex approximation of the problem
62 call mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
63
64 !solve the approximation problem using interior point method
65 if (this%subsolver .eq. "dip") then
66 call mma_subsolve_dip_cpu(this, x)
67 else
68 call mma_subsolve_dpip_cpu(this, x)
69 end if
70
71 this%is_updated = .true.
72 end subroutine mma_update_cpu
73
75 module subroutine mma_kkt_cpu(this, x, df0dx, fval, dfdx)
76 ! ----------------------------------------------------- !
77 ! Compute the KKT condition right hand side for a given !
78 ! designx x and set the max and norm values of the !
79 ! residue of KKT system to this%residumax and !
80 ! this%residunorm. !
81 ! !
82 ! The left hand sides of the KKT conditions are computed!
83 ! for the following nonlinear programming problem: !
84 ! Minimize f_0(x) + a_0*z + !
85 ! sum(c_i*y_i + 0.5*d_i*(y_i)^2)!
86 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
87 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
88 ! z >= 0, y_i >= 0, i = 1,...,m !
89 ! !
90 ! !
91 ! Note that before calling this function, the function !
92 ! values (f0val, fval, dfdx, ...) should be updated !
93 ! using the new x values. !
94 ! ----------------------------------------------------- !
95 class(mma_t), intent(inout) :: this
96 real(kind=rp), dimension(this%n), intent(in) :: x
97 real(kind=rp), dimension(this%n), intent(in) :: df0dx
98 real(kind=rp), dimension(this%m), intent(in) :: fval
99 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
100
101 if (this%subsolver .eq. "dip") then
102 call mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
103 else
104 call mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
105 end if
106 end subroutine mma_kkt_cpu
107
109 ! point method (dpip) subsolve of MMA algorithm.
110 module subroutine mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
111 ! ----------------------------------------------------- !
112 ! Compute the KKT condition right hand side for a given !
113 ! designx x and set the max and norm values of the !
114 ! residue of KKT system to this%residumax and !
115 ! this%residunorm. !
116 ! !
117 ! The left hand sides of the KKT conditions are computed!
118 ! for the following nonlinear programming problem: !
119 ! Minimize f_0(x) + a_0*z + !
120 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
121 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
122 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
123 ! z >= 0, y_i >= 0, i = 1,...,m !
124 ! !
125 ! !
126 ! Note that before calling this function, the function !
127 ! values (f0val, fval, dfdx, ...) should be updated !
128 ! using the new x values. !
129 ! ----------------------------------------------------- !
130 class(mma_t), intent(inout) :: this
131 real(kind=rp), dimension(this%n), intent(in) :: x
132 real(kind=rp), dimension(this%n), intent(in) :: df0dx
133 real(kind=rp), dimension(this%m), intent(in) :: fval
134 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
135
136 real(kind=rp) :: rez, rezeta
137 real(kind=rp), dimension(this%m) :: rey, relambda, remu, res
138 real(kind=rp), dimension(this%n) :: rex, rexsi, reeta
139 real(kind=rp), dimension(3*this%n+4*this%m+2) :: residual
140
141 real(kind=rp), dimension(4*this%m+2) :: residual_small
142 integer :: ierr
143 real(kind=rp) :: re_sq_norm
144
145 rex = df0dx + matmul(transpose(dfdx), this%lambda%x) &
146 - this%xsi%x + this%eta%x
147 rey = this%c%x + this%d%x*this%y%x - this%lambda%x - this%mu%x
148 rez = this%a0 - this%zeta - dot_product(this%lambda%x, this%a%x)
149
150 relambda = fval - this%a%x * this%z - this%y%x + this%s%x
151 rexsi = this%xsi%x * (x - this%xmin%x)
152 reeta = this%eta%x * (this%xmax%x - x)
153 remu = this%mu%x * this%y%x
154 rezeta = this%zeta * this%z
155 res = this%lambda%x * this%s%x
156
157 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
158 residual_small = [rey, rez, relambda, remu, rezeta, res]
159
160 this%residumax = maxval(abs(residual))
161 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
162
163 call mpi_allreduce(mpi_in_place, this%residumax, 1, &
164 mpi_real_precision, mpi_max, neko_comm, ierr)
165
166 call mpi_allreduce(mpi_in_place, re_sq_norm, 1, &
167 mpi_real_precision, mpi_sum, neko_comm, ierr)
168
169 this%residunorm = sqrt(norm2(residual_small)**2 + re_sq_norm)
170 end subroutine mma_dpip_kkt_cpu
171
173 ! point method (dip) subsolve of MMA algorithm.
174 module subroutine mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
175 ! ----------------------------------------------------- !
176 ! Compute the KKT condition right hand side for a given !
177 ! designx x and set the max and norm values of the !
178 ! residue of KKT system to this%residumax and !
179 ! this%residunorm. !
180 ! !
181 ! The left hand sides of the KKT conditions are computed!
182 ! for the following nonlinear programming problem: !
183 ! Minimize f_0(x) + a_0*z + !
184 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
185 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
186 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
187 ! z >= 0, y_i >= 0, i = 1,...,m !
188 ! !
189 ! !
190 ! Note that before calling this function, the function !
191 ! values (f0val, fval, dfdx, ...) should be updated !
192 ! using the new x values. !
193 ! ----------------------------------------------------- !
194 class(mma_t), intent(inout) :: this
195 real(kind=rp), dimension(this%n), intent(in) :: x
196 real(kind=rp), dimension(this%n), intent(in) :: df0dx
197 real(kind=rp), dimension(this%m), intent(in) :: fval
198 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
199
200 real(kind=rp), dimension(this%m) :: relambda, remu
201 real(kind=rp), dimension(2*this%m) :: residual
202
203
204 relambda = fval - this%a%x * this%z - this%y%x + this%mu%x
205 ! Compute residual for mu (eta in the paper)
206 remu = this%lambda%x * this%mu%x
207
208 residual = abs([relambda, remu])
209 this%residumax = maxval(residual)
210 this%residunorm = norm2(residual)
211
212 end subroutine mma_dip_kkt_cpu
213
214 !============================================================================!
215 ! private internal subroutines
216
218 subroutine mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
219 ! ----------------------------------------------------- !
220 ! Generate the approximation sub problem by computing !
221 ! the lower and upper asymtotes and the other necessary !
222 ! parameters (alpha, beta, p0j, q0j, pij, qij, ...). !
223 ! ----------------------------------------------------- !
224 class(mma_t), intent(inout) :: this
225 real(kind=rp), dimension(this%n), intent(in) :: x
226 real(kind=rp), dimension(this%n), intent(in) :: df0dx
227 real(kind=rp), dimension(this%m), intent(in) :: fval
228 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
229 integer, intent(in) :: iter
230 integer :: i, j, ierr
231 real(kind=rp), dimension(this%n) :: x_diff
232 real(kind=rp) :: asy_factor
233
234 x_diff = this%xmax%x - this%xmin%x
235
236 ! ------------------------------------------------------------------------ !
237 ! Setup the current asymptotes
238 associate(low => this%low%x, upp => this%upp%x, &
239 x_1 => this%xold1%x, x_2 => this%xold2%x)
240
241 if (iter .lt. 3) then
242 ! Initialize the lower and upper asymptotes
243 low = x - this%asyinit * x_diff
244 upp = x + this%asyinit * x_diff
245 else
246 do j = 1, this%n
247 if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .lt. 0.0_rp) then
248 asy_factor = this%asydecr
249 else if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .gt. 0.0_rp) then
250 asy_factor = this%asyincr
251 else
252 asy_factor = 1.0_rp
253 end if
254
255 low(j) = x(j) - asy_factor * (x_1(j) - low(j))
256 upp(j) = x(j) + asy_factor * (upp(j) - x_1(j))
257 end do
258
259
260 ! Setting a minimum and maximum for the low and upp
261 ! asymptotes (eq3.9)
262 low = max(low, x - 10.0_rp * x_diff)
263 low = min(low, x - 0.01_rp * x_diff)
264
265 upp = min(upp, x + 10.0_rp * x_diff)
266 upp = max(upp, x + 0.01_rp * x_diff)
267 end if
268
269 end associate
270
271
272 ! ------------------------------------------------------------------------ !
273 ! Set the the bounds and coefficients for the approximation
274 ! the move bounds (alpha and beta) are slightly more restrictive
275 ! than low and upp. This is done based on eq(3.6)--eq(3.10).
276 ! also check
277 ! https://comsolyar.com/wp-content/uploads/2020/03/gcmma.pdf
278 ! eq (2.8) and (2.9)
279
280 associate(alpha => this%alpha%x, beta => this%beta%x, &
281 xmin => this%xmin%x, xmax => this%xmax%x, &
282 low => this%low%x, upp => this%upp%x, x => x)
283
284 alpha = max(xmin, low + 0.1_rp*(x - low), x - 0.5_rp*x_diff)
285 beta = min(xmax, upp - 0.1_rp*(upp - x), x + 0.5_rp*x_diff)
286 end associate
287
288 ! ------------------------------------------------------------------------ !
289 ! Calculate p0j, q0j, pij, qij
290 ! where j = 1,2,...,n and i = 1,2,...,m (eq(2.3)-eq(2.5))
291
292 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
293 pij => this%pij%x, qij => this%qij%x, &
294 low => this%low%x, upp => this%upp%x)
295
296 p0j = ( &
297 1.001_rp * max(df0dx, 0.0_rp) &
298 + 0.001_rp * max(-df0dx, 0.0_rp) &
299 + 0.00001_rp / max(x_diff, 0.00001_rp) &
300 ) * (upp - x)**2
301
302 q0j = ( &
303 0.001_rp * max(df0dx, 0.0_rp) &
304 + 1.001_rp * max(-df0dx, 0.0_rp) &
305 + 0.00001_rp / max(x_diff, 0.00001_rp)&
306 ) * (x - low)**2
307
308 do j = 1, this%n
309 do i = 1, this%m
310 pij(i, j) = ( &
311 1.001_rp * max(dfdx(i, j), 0.0_rp) &
312 + 0.001_rp * max(-dfdx(i, j), 0.0_rp) &
313 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
314 ) * (upp(j) - x(j))**2
315
316 qij(i, j) = ( &
317 0.001_rp * max(dfdx(i, j), 0.0_rp) &
318 + 1.001_rp * max(-dfdx(i, j), 0.0_rp) &
319 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
320 ) * (x(j) - low(j))**2
321 end do
322 end do
323
324 end associate
325
326 ! ------------------------------------------------------------------------ !
327 ! Computing bi as defined in page 5
328
329 associate(bi => this%bi%x, &
330 pij => this%pij%x, qij => this%qij%x, &
331 low => this%low%x, upp => this%upp%x)
332
333 bi = 0.0_rp
334 do i = 1, this%m
335 do j = 1, this%n
336 bi(i) = bi(i) &
337 + pij(i, j) / (upp(j) - x(j)) &
338 + qij(i, j) / (x(j) - low(j))
339 end do
340 end do
341
342 call mpi_allreduce(mpi_in_place, bi, this%m, &
343 mpi_real_precision, mpi_sum, neko_comm, ierr)
344 bi = bi - fval
345
346 end associate
347 end subroutine mma_gensub_cpu
348
351 subroutine mma_subsolve_dpip_cpu(this, designx)
352 ! ------------------------------------------------------- !
353 ! Dual-primal interior point method using Newton's step !
354 ! to solve MMA sub problem. !
355 ! A Backtracking Line Search approach is used to compute !
356 ! the step size; starting with the full Newton's step !
357 ! (delta = 1) and dividing by 2 until we have a step size !
358 ! that leads to a feasible point while ensuring a !
359 ! decrease in the residue. !
360 ! ------------------------------------------------------- !
361
362 class(mma_t), intent(inout) :: this
363 real(kind=rp), dimension(this%n), intent(inout) :: designx
364 ! Note that there is a local dummy "x" in this subroutine, thus, we call
365 ! the current design "designx" instead of just "x"
366 integer :: i, j, k, iter, itto, ierr
367 real(kind=rp) :: epsi, residual_max, residual_norm, &
368 z, zeta, rez, rezeta, &
369 delz, dz, dzeta, &
370 steg, zold, zetaold, new_residual
371 real(kind=rp), dimension(this%m) :: y, lambda, s, mu, &
372 rey, relambda, remu, res, &
373 dely, dellambda, &
374 dy, dlambda, ds, dmu, &
375 yold, lambdaold, sold, muold
376 real(kind=rp), dimension(this%n) :: x, xsi, eta, &
377 rex, rexsi, reeta, &
378 delx, diagx, dx, dxsi, deta, &
379 xold, xsiold, etaold
380 real(kind=rp), dimension(4*this%m + 2) :: residual_small
381 real(kind=rp), dimension(3*this%n + 4*this%m + 2) :: residual
382 real(kind=rp), dimension(2*this%n + 4*this%m + 2) :: xx, dxx
383
384 real(kind=rp), dimension(this%m, this%n) :: gg
385 real(kind=rp), dimension(this%m+1) :: bb
386 real(kind=rp), dimension(this%m+1, this%m+1) :: aa
387 real(kind=rp), dimension(this%m * this%m) :: aa_buffer
388
389 ! using DGESV in lapack to solve
390 ! the linear system which needs the following parameters
391 integer :: info
392 integer, dimension(this%m+1) :: ipiv
393
394 ! Parameters for global communication
395 real(kind=rp) :: re_sq_norm
396 real(kind=rp) :: minimal_epsilon
397
398 integer :: nglobal
399
400 ! ------------------------------------------------------------------------ !
401 ! initial value for the parameters in the subsolve based on
402 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
403
404 epsi = 1.0_rp !100
405 x = 0.5_rp * (this%alpha%x + this%beta%x)
406 y = 1.0_rp
407 z = 1.0_rp
408 zeta = 1.0_rp
409 lambda = 1.0_rp
410 s = 1.0_rp
411 xsi = max(1.0_rp, 1.0_rp / (x - this%alpha%x))
412 eta = max(1.0_rp, 1.0_rp / (this%beta%x - x))
413 mu = max(1.0_rp, 0.5_rp * this%c%x)
414
415 call mpi_allreduce(this%n, nglobal, 1, &
416 mpi_integer, mpi_sum, neko_comm, ierr)
417
418 ! ------------------------------------------------------------------------ !
419 ! Computing the minimal epsilon and choose the most conservative one
420
421 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
422 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
423 mpi_real_precision, mpi_min, neko_comm, ierr)
424
425 ! ------------------------------------------------------------------------ !
426 ! The main loop of the dual-primal interior point method.
427
428 do while (epsi .gt. minimal_epsilon)
429
430 ! --------------------------------------------------------------------- !
431 ! Calculating residuals based on
432 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
433 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
434
435 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
436 pij => this%pij%x, qij => this%qij%x, &
437 low => this%low%x, upp => this%upp%x, &
438 alpha => this%alpha%x, beta => this%beta%x, &
439 c => this%c%x, d => this%d%x, &
440 a0 => this%a0, a => this%a%x, &
441 bi => this%bi%x)
442
443 rex = (p0j + matmul(transpose(pij), lambda)) / (upp - x)**2 &
444 - (q0j + matmul(transpose(qij), lambda)) / (x - low)**2 &
445 - xsi + eta
446
447 rey = c + d * y - lambda - mu
448 rez = a0 - zeta - dot_product(lambda, a)
449
450 relambda = 0.0_rp
451 do i = 1, this%m
452 do j = 1, this%n
453 ! Accumulate sums for relambda (the term gi(x))
454 relambda(i) = relambda(i) &
455 + pij(i, j) / (upp(j) - x(j)) &
456 + qij(i, j) / (x(j) - low(j))
457 end do
458 end do
459
460 end associate
461
462 ! --------------------------------------------------------------------- !
463 ! Computing the norm of the residuals
464
465 ! Complete the computations of lambda residuals
466 call mpi_allreduce(mpi_in_place, relambda, this%m, &
467 mpi_real_precision, mpi_sum, neko_comm, ierr)
468 relambda = relambda - this%a%x*z - y + s - this%bi%x
469
470 rexsi = xsi * (x - this%alpha%x) - epsi
471 reeta = eta * (this%beta%x - x) - epsi
472 remu = mu * y - epsi
473 rezeta = zeta * z - epsi
474 res = lambda * s - epsi
475
476 ! Setup vectors of residuals and their norms
477 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
478 residual_small = [rey, rez, relambda, remu, rezeta, res]
479
480 residual_max = maxval(abs(residual))
481 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
482
483 call mpi_allreduce(mpi_in_place, residual_max, 1, &
484 mpi_real_precision, mpi_max, neko_comm, ierr)
485
486 call mpi_allreduce(mpi_in_place, re_sq_norm, &
487 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
488
489 residual_norm = sqrt(norm2(residual_small)**2 + re_sq_norm)
490
491 ! --------------------------------------------------------------------- !
492 ! Internal loop
493
494 do iter = 1, this%max_iter
495
496 !Check the condition
497 if (residual_max .lt. epsi) exit
498
499 delx = 0.0_rp
500 do j = 1, this%n
501 do i = 1, this%m
502 delx(j) = delx(j) &
503 + this%pij%x(i,j) * lambda(i) / (this%upp%x(j) - x(j))**2 &
504 - this%qij%x(i,j) * lambda(i) / (x(j) - this%low%x(j))**2
505 end do
506 end do
507
508 delx = delx &
509 + this%p0j%x / (this%upp%x - x)**2 &
510 - this%q0j%x / (x - this%low%x)**2 &
511 - epsi / (x - this%alpha%x) &
512 + epsi / (this%beta%x - x)
513
514 dely = this%c%x + this%d%x * y - lambda - epsi / y
515 delz = this%a0 - dot_product(lambda, this%a%x) - epsi / z
516
517 ! Accumulate sums for dellambda (the term gi(x))
518 dellambda = 0.0_rp
519 do i = 1, this%m
520 do j = 1, this%n
521 dellambda(i) = dellambda(i) &
522 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
523 + this%qij%x(i, j) / (x(j) - this%low%x(j))
524 end do
525 end do
526
527 call mpi_allreduce(mpi_in_place, dellambda, this%m, &
528 mpi_real_precision, mpi_sum, neko_comm, ierr)
529
530 dellambda = dellambda - this%a%x*z - y - this%bi%x + epsi / lambda
531
532 do i = 1, this%m
533 gg(i,:) = this%pij%x(i,:) / (this%upp%x - x)**2 &
534 - this%qij%x(i,:) / (x - this%low%x)**2
535 end do
536
537 diagx = &
538 (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
539 / (this%upp%x - x)**3 &
540 + (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
541 / (x - this%low%x)**3
542
543 diagx = 2.0_rp * diagx &
544 + xsi / (x - this%alpha%x) &
545 + eta / (this%beta%x - x)
546
547
548 !Here we only consider the case m<n in the matlab code
549 !assembling the right hand side matrix based on eq(5.20)
550 ! bb = [dellambda + dely/(this%d%x + &
551 ! (mu/y)) - matmul(GG,delx/diagx), delz ]
552
553 !--------------------------------------------------------------------!
554 ! for MPI computation of bb
555
556 bb = 0.0_rp
557 do i = 1, this%m
558 do j = 1, this%n
559 bb(i) = bb(i) + gg(i, j) * (delx(j) / diagx(j))
560 end do
561 end do
562
563 call mpi_allreduce(mpi_in_place, bb, this%m, &
564 mpi_real_precision, mpi_sum, neko_comm, ierr)
565
566 bb(1:this%m) = dellambda + dely / (this%d%x + mu / y) - bb(1:this%m)
567 bb(this%m + 1) = delz
568
569 !--------------------------------------------------------------------!
570 ! assembling the coefficients matrix AA based on eq(5.20)
571 ! AA(1:this%m,1:this%m) = &
572 ! matmul(matmul(GG,mma_diag(1/diagx)), transpose(GG))
573 ! !update diag(AA)
574 ! AA(1:this%m,1:this%m) = AA(1:this%m,1:this%m) + &
575 ! mma_diag(s/lambda + 1.0/(this%d%x + (mu/y)))
576
577 aa = 0.0_rp
578 ! Direct computation of the matrix multiplication
579 ! (for better performance)
580 do i = 1, this%m
581 do j = 1, this%m
582 ! Compute the (i, j) element of AA
583 do k = 1, this%n !this n is global
584 aa(i, j) = aa(i, j) &
585 + gg(i, k) * (1.0_rp / diagx(k)) * gg(j, k)
586 end do
587 end do
588 end do
589
590 aa_buffer = reshape(aa(1:this%m, 1:this%m), [this%m * this%m])
591
592 call mpi_allreduce(mpi_in_place, aa_buffer, &
593 this%m*this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
594
595 aa(1:this%m, 1:this%m) = reshape(aa_buffer, [this%m, this%m])
596
597 do i = 1, this%m
598 ! update the diag AA
599 aa(i, i) = aa(i, i) &
600 + s(i) / lambda(i) &
601 + 1.0_rp / (this%d%x(i) + mu(i) / y(i))
602 end do
603
604 aa(1:this%m, this%m+1) = this%a%x
605 aa(this%m+1, 1:this%m) = this%a%x
606 aa(this%m+1, this%m+1) = - zeta/z
607
608 call dgesv(this%m + 1, 1, aa, this%m + 1, ipiv, bb, this%m + 1, info)
609
610 if (info .ne. 0) then
611 call neko_error("DGESV failed to solve the linear system in " // &
612 "mma_subsolve_dpip.")
613 end if
614
615
616 dlambda = bb(1:this%m)
617 dz = bb(this%m + 1)
618
619 ! based on eq(5.19)
620 dx = - delx / diagx - matmul(transpose(gg), dlambda) / diagx
621 dy = (-dely + dlambda) / (this%d%x + mu / y)
622
623 dxsi = -xsi + (epsi - dx * xsi) / (x - this%alpha%x)
624 deta = -eta + (epsi + dx * eta) / (this%beta%x - x)
625 dmu = -mu + (epsi - mu * dy) / y
626 dzeta = -zeta + (epsi - zeta * dz) / z
627 ds = -s + (epsi - dlambda * s) / lambda
628
629 dxx = [dy, dz, dlambda, dxsi, deta, dmu, dzeta, ds]
630 xx = [y, z, lambda, xsi, eta, mu, zeta, s]
631
632 steg = 1.0_rp / maxval([ &
633 1.0_rp, &
634 -1.01_rp * dxx / xx, &
635 -1.01_rp * dx / (x - this%alpha%x), &
636 1.01_rp * dx / (this%beta%x - x) &
637 ])
638
639 ! Save the old values
640 xold = x
641 yold = y
642 zold = z
643 lambdaold = lambda
644 xsiold = xsi
645 etaold = eta
646 muold = mu
647 zetaold = zeta
648 sold = s
649
650 new_residual = 2.0_rp * residual_norm
651
652 ! Share the new_residual and steg values
653 call mpi_allreduce(mpi_in_place, steg, 1, &
654 mpi_real_precision, mpi_min, neko_comm, ierr)
655 call mpi_allreduce(mpi_in_place, new_residual, 1, &
656 mpi_real_precision, mpi_min, neko_comm, ierr)
657
658 ! The innermost loop to determine the suitable step length
659 ! using the Backtracking Line Search approach
660 itto = 0
661 do while ((new_residual .gt. residual_norm) .and. (itto .lt. 50))
662 itto = itto + 1
663
664 ! update the variables
665 x = xold + steg*dx
666 y = yold + steg*dy
667 z = zold + steg*dz
668
669 lambda = lambdaold + steg*dlambda
670 xsi = xsiold + steg*dxsi
671 eta = etaold + steg*deta
672 mu = muold + steg*dmu
673 zeta = zetaold + steg*dzeta
674 s = sold + steg*ds
675
676 ! Recompute the new_residual to see if this stepsize improves
677 ! the residue
678 rex = (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
679 / (this%upp%x - x)**2 &
680 - (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
681 / (x - this%low%x)**2 &
682 - xsi + eta
683
684 rey = this%c%x + this%d%x*y - lambda - mu
685 rez = this%a0 - zeta - dot_product(lambda, this%a%x)
686
687 ! Accumulate sums for relambda (the term gi(x))
688 relambda = 0.0_rp
689 do i = 1, this%m
690 do j = 1, this%n
691 relambda(i) = relambda(i) &
692 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
693 + this%qij%x(i, j) / (x(j) - this%low%x(j))
694 end do
695 end do
696
697 call mpi_allreduce(mpi_in_place, relambda, this%m, &
698 mpi_real_precision, mpi_sum, neko_comm, ierr)
699
700 relambda = relambda - this%a%x*z - y + s - this%bi%x
701
702 rexsi = xsi * (x - this%alpha%x) - epsi
703 reeta = eta * (this%beta%x - x) - epsi
704 remu = mu * y - epsi
705 rezeta = zeta * z - epsi
706 res = lambda * s - epsi
707
708 ! Compute squared norms for the residuals
709 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
710 call mpi_allreduce(mpi_in_place, re_sq_norm, &
711 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
712
713 residual_small = [rey, rez, relambda, remu, rezeta, res]
714 new_residual = sqrt(norm2(residual_small)**2 + re_sq_norm)
715
716 steg = steg / 2.0_rp
717 end do
718 steg = 2.0_rp * steg ! Correction for the final division by 2
719
720 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
721
722 ! Update the maximum and norm of the residuals
723 residual_norm = new_residual
724 residual_max = maxval(abs(residual))
725 call mpi_allreduce(mpi_in_place, residual_max, 1, &
726 mpi_real_precision, mpi_max, neko_comm, ierr)
727 end do
728
729 epsi = 0.1_rp * epsi
730 end do
731
732 ! Save the new designx
733 this%xold2%x = this%xold1%x
734 this%xold1%x = designx
735 designx = x
736
737 !update the parameters of the MMA object nesessary to compute KKT residual
738 this%y%x = y
739 this%z = z
740 this%lambda%x = lambda
741 this%zeta = zeta
742 this%xsi%x = xsi
743 this%eta%x = eta
744 this%mu%x = mu
745 this%s%x = s
746
747 end subroutine mma_subsolve_dpip_cpu
748
751 subroutine mma_subsolve_dip_cpu(this, designx)
752 ! ------------------------------------------------------------------------ !
753 ! -------------------------------Dual Solver------------------------------ !
754 ! ------------------------------------------------------------------------ !
755 ! This implementation is based on: !
756 ! https://doi.org/10.1007/s00158-012-0869-2 !
757 ! Definition of the Lagrangian function: !
758 ! !
759 ! L(x, y, z, λ) = !
760 ! sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i * p_{ij}) / (u_j - x_j)!
761 ! + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) / (x_j - l_j) ]!
762 ! - sum_{i=1}^{m} λ_i * b_i !
763 ! + sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ] !
764 ! + (a_0 - sum_{i=1}^{m} λ_i * a_i) * z !
765 ! !
766 ! Breakdown of terms: !
767 ! - Terms related to x: L_x (the first three lines of L(x, y, z, λ)) !
768 ! - Terms related to y: L_y (the fourth line of L(x, y, z, λ)) !
769 ! - Terms related to z: L_z (the last line of L(x, y, z, λ)) !
770 ! !
771 ! Optimization problem if λ is given: !
772 ! !
773 ! Minimize L(x, y, z, λ) !
774 ! subject to: α_j ≤ x_j ≤ β_j, z ≥ 0, and y_i ≥ 0 for all i, j. !
775 ! !
776 ! Since the problem is separable: !
777 ! Ψ(λ) = !
778 ! sum_{j=1}^{n} min_xj {L_x(x_j, λ) | α_j ≤ x_j ≤ β_j} !
779 ! + min_z {L_z(z, λ) | z ≥ 0} !
780 ! + sum_{i=1}^{m} min_yi {L_y(y_i, λ) | y_i ≥ 0} !
781 ! !
782 ! Maximize Ψ(λ) subject to λ_i ≥ 0 for i = 1, ..., m. !
783 ! !
784 ! ------------------------------------------------------------------------ !
785
786 class(mma_t), intent(inout) :: this
787 real(kind=rp), dimension(this%n), intent(inout) :: designx
788 ! Note that there is a local dummy "x" in this subroutine, thus, we call
789 ! the current design "designx" instead of just "x"
790 integer :: i, j, k, iter, ierr
791 real(kind=rp) :: epsi, residual_max, z, steg
792 real(kind=rp), dimension(this%m) :: y, lambda, mu, &
793 relambda, remu, dlambda, dmu, gradlambda
794 real(kind=rp), dimension(this%n) :: x, pjlambda, qjlambda
795
796 ! To compute the Hessian based on eq(13)
797 ! https://doi.org/10.1007/s00158-012-0869-2
798
799 ! inverse of a diag matrix:
800 real(kind=rp), dimension(this%n) :: ljjxinv ! [∇_x^2 Ljj]−1
801 real(kind=rp), dimension(this%m,this%n) :: hijx ! ∇_x hij
802 real(kind=rp), dimension(this%m,this%m) :: hess
803 real(kind=rp) :: hesstrace
804
805
806 ! using DGESV in lapack to solve
807 ! the linear system which needs the following parameters
808 integer :: info
809 integer, dimension(this%m+1) :: ipiv
810
811 ! Parameters for global communication
812 real(kind=rp) :: minimal_epsilon
813
814 integer :: nglobal
815
816 ! ------------------------------------------------------------------------ !
817 ! initial value for the parameters in the subsolve based on
818 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
819
820 epsi = 1.0_rp !100
821 ! x = 0.5_rp * (this%alpha%x + this%beta%x)
822 y = 1.0_rp
823 z = 0.0_rp
824 lambda = max(1.0_rp, 0.5_rp * this%c%x)
825 mu = 1.0_rp !this parameter is eta in Niel's paper
826 ! note that mu in the paper translates to epsi in the code following the
827 ! same style as the Cpp code by Neils
828
829 call mpi_allreduce(this%n, nglobal, 1, &
830 mpi_integer, mpi_sum, neko_comm, ierr)
831
832 ! ------------------------------------------------------------------------ !
833 ! Computing the minimal epsilon and choose the most conservative one
834
835 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
836 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
837 mpi_real_precision, mpi_min, neko_comm, ierr)
838
839 ! ------------------------------------------------------------------------ !
840 ! The main loop of the dual-primal interior point method.
841
842 do while (epsi .gt. minimal_epsilon)
843
844 ! --------------------------------------------------------------------- !
845 ! Calculating residuals based on
846 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
847 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
848
849 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
850 pij => this%pij%x, qij => this%qij%x, &
851 low => this%low%x, upp => this%upp%x, &
852 alpha => this%alpha%x, beta => this%beta%x, &
853 c => this%c%x, d => this%d%x, &
854 a0 => this%a0, a => this%a%x, &
855 bi => this%bi%x)
856 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
857 ! the initial value of λ
858
859 ! Comput the value of y that minimizes L_y for the current λ
860 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ])
861 ! dL_y/dy =0 => y= (λ_i - c_i)/d_i, ensure y>=0
862 do i=1, this%m
863 if (abs(d(i)) < neko_eps) then
864 ! to avoid devision by zero in case d=0
865 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (1.0e-8_rp))
866 ! y(i) = merge(0.0_rp, 1.0_rp, (lambda(i) - c(i)) >= 0.0_rp)
867 else
868 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (d(i)))
869 end if
870 end do
871
872 ! Comput the value of z that minimizes L_z for the current λ
873 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z)
874 ! if (a_0-dot_product(lambda, a)>=0) z=0 else z= 1.0
875 ! ensure z>=0
876 z = merge(0.0_rp, 1.0_rp, a0 - dot_product(lambda, a) >= 0.0_rp)
877
878 ! Comput the value of x that minimizes L_x for the current λ
879 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i *
880 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
881 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
882 pjlambda = (p0j + matmul(transpose(pij), lambda))
883 qjlambda = (q0j + matmul(transpose(qij), lambda))
884 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
885 (sqrt(pjlambda) + sqrt(qjlambda))
886
887 ! Ensure that x is feasible (alpha<=x<=beta)
888 x = merge(alpha, x, x .lt. alpha)
889 x = merge(beta, x, x .gt. beta)
890
891 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
892 relambda = matmul(pij, 1.0_rp / (upp - x)) + &
893 matmul(qij, 1.0_rp / (x - low))
894
895 ! Global comminucation for relambda values
896 call mpi_allreduce(mpi_in_place, relambda, this%m, &
897 mpi_real_precision, mpi_sum, neko_comm, ierr)
898 relambda = relambda - bi - y - a * z + mu
899
900 ! Compute residual for mu (eta in the paper)
901 remu = mu * lambda - epsi
902
903 residual_max = maxval(abs([relambda, remu]))
904
905 ! ------------------------------------------------------------------- !
906 ! Internal loop
907 do iter = 1, this%max_iter
908
909 !Check the condition
910 if (residual_max .lt. epsi) exit
911
912 ! Compute dL(x, y, z, λ)/dλ for the updated x(λ), y(λ), z(λ)
913
914 gradlambda = matmul(pij, 1.0_rp / (upp - x)) + &
915 matmul(qij, 1.0_rp/(x - low))
916
917 ! Global comminucation for gradlambda values
918 call mpi_allreduce(mpi_in_place, gradlambda, this%m, &
919 mpi_real_precision, mpi_sum, neko_comm, ierr)
920 gradlambda = gradlambda - bi - y - a * z
921
922 ! Update gradlambda as the right hand side for Newton's method(eq10)
923 gradlambda = - gradlambda - epsi / lambda
924
925 ! Computing the Hessian as in equation (13) in
926 !! https://doi.org/10.1007/s00158-012-0869-2
927
928 !--------------contributions of x terms to Hess--------------------!
929 ljjxinv= - 1.0_rp / ( (2.0_rp * pjlambda/(upp - x)**3) + &
930 (2.0_rp * qjlambda/(x - low)**3))
931
932
933 ! Remove the sensitivity for the active primal constraints
934 ljjxinv = merge(0.0_rp, ljjxinv, x - alpha < neko_eps)
935 ljjxinv = merge(0.0_rp, ljjxinv, beta - x < neko_eps)
936
937 do i = 1, this%m
938 hijx(i,:) = pij(i,:) / (upp - x)**2 &
939 - qij(i,:) / (x - low)**2
940 end do
941
942 hess = 0.0_rp
943 ! Direct computation of the matrix multiplication
944 ! (for better performance)
945 do i = 1, this%m
946 do j = 1, this%m
947 ! Compute the (i, j) element of AA
948 do k = 1, this%n !this n is global
949 hess(i, j) = hess(i, j) &
950 + hijx(i, k) * (ljjxinv(k)) * hijx(j, k)
951 end do
952 end do
953 end do
954
955 call mpi_allreduce(mpi_in_place, hess, &
956 this%m*this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
957
958 !---------------contributions of z terms to Hess-------------------!
959 ! There is no contibution to the Hess from z terms as z terms are
960 ! linear w.r.t λ
961
962 !---------------contributions of y terms to Hess-------------------!
963 ! Only for inactive constraint, we consider contributions to Hess.
964 ! Note that if d(i) = 0, the y terms (just like z terms) will not
965 ! contribute to the Hessian matrix.
966 do i = 1, this%m
967 if (y(i) .gt. 0.0_rp) then
968 if (abs(d(i)) < neko_eps) then
969 ! Hess(i, i) = Hess(i, i) - 1.0_rp/1.0e-8_rp
970 else
971 hess(i, i) = hess(i, i) - 1.0_rp/d(i)
972 end if
973 end if
974 ! Based on eq(10), note the term (-\Omega \Lambda)
975 hess(i, i) = hess(i, i) - mu(i) / lambda(i)
976 end do
977
978 ! Improve the robustness by stablizing the Hess using
979 ! Levenberg-Marquardt algorithm (heuristically)
980 hesstrace = 0.0_rp
981 do i=1, this%m
982 hesstrace = hesstrace + hess(i, i)
983 end do
984 do i=1, this%m
985 hess(i,i) = hess(i, i) - &
986 max(-1.0e-4_rp*hesstrace/this%m, 1.0e-7_rp)
987 end do
988
989 call dgesv(this%m , 1, hess, this%m , ipiv, &
990 gradlambda, this%m, info)
991
992 if (info .ne. 0) then
993 call neko_error("DGESV failed to solve the linear system in " // &
994 "mma_subsolve_dip.")
995 end if
996 dlambda = gradlambda
997
998 ! based on eq(11) for delta eta
999 dmu = -mu + epsi / lambda - dlambda * mu / lambda
1000
1001 ! Compute the stepsize and update lambda and mu (eta in the paper)
1002
1003 steg = 1.005_rp
1004 do i = 1, this%m
1005 steg = merge(-1.01_rp * dlambda(i) / lambda(i), steg, &
1006 steg < -1.01_rp * dlambda(i) / lambda(i))
1007 steg = merge(-1.01_rp * dmu(i) / mu(i), &
1008 steg, steg < -1.01_rp * dmu(i) / mu(i))
1009 end do
1010
1011 steg = 1.0_rp / steg
1012
1013 lambda = lambda + steg*dlambda
1014 mu = mu + steg*dmu
1015
1016 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
1017 ! the updated values of λ
1018
1019 ! Comput the value of y that minimizes L_y for the current λ
1020 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ])
1021 ! dL_y/dy =0 => y= (λ_i - c_i)/d_i, ensure y>=0
1022 do i=1, this%m
1023 if (abs(d(i)) < neko_eps) then
1024 ! to avoid devision by zero in case d=0
1025 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (1.0e-8_rp))
1026 ! y(i) = merge(0.0_rp, 1.0_rp, (lambda(i) - c(i)) >= 0.0_rp)
1027 else
1028 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (d(i)))
1029 end if
1030 end do
1031
1032 ! Comput the value of z that minimizes L_z for the current λ
1033 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z)
1034 ! if (a_0-dot_product(lambda, a)>=0) z=0 else z= 1.0
1035 ! ensure z>=0
1036 z = merge(0.0_rp, 1.0_rp, a0 - dot_product(lambda, a) >= 0.0_rp)
1037
1038 ! Comput the value of x that minimizes L_x for the current λ
1039 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i *
1040 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
1041 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
1042 pjlambda = (p0j + matmul(transpose(pij), lambda))
1043 qjlambda = (q0j + matmul(transpose(qij), lambda))
1044 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
1045 (sqrt(pjlambda) + sqrt(qjlambda))
1046
1047 ! Ensure that x is feasible (alpha<=x<=beta)
1048 x = merge(alpha, x, x .lt. alpha)
1049 x = merge(beta, x, x .gt. beta)
1050
1051 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
1052 relambda = matmul(pij, 1.0_rp / (upp - x)) + &
1053 matmul(qij, 1.0_rp / (x - low))
1054 ! Global comminucation for relambda values
1055 call mpi_allreduce(mpi_in_place, relambda, this%m, &
1056 mpi_real_precision, mpi_sum, neko_comm, ierr)
1057 relambda = relambda - bi - y - a * z + mu
1058
1059 ! Compute residual for mu (eta in the paper)
1060 remu = mu * lambda - epsi
1061
1062 residual_max = maxval(abs([relambda, remu]))
1063 end do
1064 end associate
1065
1066 epsi = 0.1_rp * epsi
1067 end do
1068
1069 ! Save the new designx
1070 this%xold2%x = this%xold1%x
1071 this%xold1%x = designx
1072 designx = x
1073
1074 !update the parameters of the MMA object nesessary to compute KKT residual
1075
1076 this%y%x = y
1077 this%z = z
1078 this%lambda%x = lambda
1079 this%mu%x = mu
1080 end subroutine mma_subsolve_dip_cpu
1081
1082end submodule mma_cpu