Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
mma_cpu.f90
1! Copyright (c) 2025, The Neko-TOP Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32
33submodule(mma) mma_cpu
34 use lapack_interfaces, only: dgesv
35 use mpi_f08, only: mpi_in_place, mpi_max, mpi_min
36 use comm, only: neko_comm, pe_rank, mpi_real_precision
37 implicit none
38
39contains
40
41 module subroutine mma_update_cpu(this, iter, x, df0dx, fval, dfdx)
42 ! ----------------------------------------------------- !
43 ! Update the design variable x by solving the convex !
44 ! approximation of the problem. !
45 ! !
46 ! This subroutine is called in each iteration of the !
47 ! optimization loop !
48 ! ----------------------------------------------------- !
49 class(mma_t), intent(inout) :: this
50 integer, intent(in) :: iter
51 real(kind=rp), dimension(this%n), intent(inout) :: x
52 real(kind=rp), dimension(this%n), intent(in) :: df0dx
53 real(kind=rp), dimension(this%m), intent(in) :: fval
54 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
55
56 if (.not. this%is_initialized) then
57 call neko_error("The MMA object is not initialized.")
58 end if
59
60 ! generate a convex approximation of the problem
61 call mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
62
63 !solve the approximation problem using interior point method
64 if (this%subsolver .eq. "dip") then
65 call mma_subsolve_dip_cpu(this, x)
66 else
67 call mma_subsolve_dpip_cpu(this, x)
68 end if
69
70 this%is_updated = .true.
71 end subroutine mma_update_cpu
72
74 module subroutine mma_kkt_cpu(this, x, df0dx, fval, dfdx)
75 ! ----------------------------------------------------- !
76 ! Compute the KKT condition right hand side for a given !
77 ! designx x and set the max and norm values of the !
78 ! residue of KKT system to this%residumax and !
79 ! this%residunorm. !
80 ! !
81 ! The left hand sides of the KKT conditions are computed!
82 ! for the following nonlinear programming problem: !
83 ! Minimize f_0(x) + a_0*z + !
84 ! sum(c_i*y_i + 0.5*d_i*(y_i)^2)!
85 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
86 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
87 ! z >= 0, y_i >= 0, i = 1,...,m !
88 ! !
89 ! !
90 ! Note that before calling this function, the function !
91 ! values (f0val, fval, dfdx, ...) should be updated !
92 ! using the new x values. !
93 ! ----------------------------------------------------- !
94 class(mma_t), intent(inout) :: this
95 real(kind=rp), dimension(this%n), intent(in) :: x
96 real(kind=rp), dimension(this%n), intent(in) :: df0dx
97 real(kind=rp), dimension(this%m), intent(in) :: fval
98 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
99
100 if (this%subsolver .eq. "dip") then
101 call mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
102 else
103 call mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
104 end if
105 end subroutine mma_kkt_cpu
106
108 ! point method (dpip) subsolve of MMA algorithm.
109 module subroutine mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
110 ! ----------------------------------------------------- !
111 ! Compute the KKT condition right hand side for a given !
112 ! designx x and set the max and norm values of the !
113 ! residue of KKT system to this%residumax and !
114 ! this%residunorm. !
115 ! !
116 ! The left hand sides of the KKT conditions are computed!
117 ! for the following nonlinear programming problem: !
118 ! Minimize f_0(x) + a_0*z + !
119 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
120 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
121 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
122 ! z >= 0, y_i >= 0, i = 1,...,m !
123 ! !
124 ! !
125 ! Note that before calling this function, the function !
126 ! values (f0val, fval, dfdx, ...) should be updated !
127 ! using the new x values. !
128 ! ----------------------------------------------------- !
129 class(mma_t), intent(inout) :: this
130 real(kind=rp), dimension(this%n), intent(in) :: x
131 real(kind=rp), dimension(this%n), intent(in) :: df0dx
132 real(kind=rp), dimension(this%m), intent(in) :: fval
133 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
134
135 real(kind=rp) :: rez, rezeta
136 real(kind=rp), dimension(this%m) :: rey, relambda, remu, res
137 real(kind=rp), dimension(this%n) :: rex, rexsi, reeta
138 real(kind=rp), dimension(3*this%n+4*this%m+2) :: residual
139
140 real(kind=rp), dimension(4*this%m+2) :: residual_small
141 integer :: ierr
142 real(kind=rp) :: re_sq_norm
143
144 rex = df0dx + matmul(transpose(dfdx), this%lambda%x) &
145 - this%xsi%x + this%eta%x
146 rey = this%c%x + this%d%x*this%y%x - this%lambda%x - this%mu%x
147 rez = this%a0 - this%zeta - dot_product(this%lambda%x, this%a%x)
148
149 relambda = fval - this%a%x * this%z - this%y%x + this%s%x
150 rexsi = this%xsi%x * (x - this%xmin%x)
151 reeta = this%eta%x * (this%xmax%x - x)
152 remu = this%mu%x * this%y%x
153 rezeta = this%zeta * this%z
154 res = this%lambda%x * this%s%x
155
156 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
157 residual_small = [rey, rez, relambda, remu, rezeta, res]
158
159 this%residumax = maxval(abs(residual))
160 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
161
162 call mpi_allreduce(mpi_in_place, this%residumax, 1, &
163 mpi_real_precision, mpi_max, neko_comm, ierr)
164
165 call mpi_allreduce(mpi_in_place, re_sq_norm, 1, &
166 mpi_real_precision, mpi_sum, neko_comm, ierr)
167
168 this%residunorm = sqrt(norm2(residual_small)**2 + re_sq_norm)
169 end subroutine mma_dpip_kkt_cpu
170
172 ! point method (dip) subsolve of MMA algorithm.
173 module subroutine mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
174 ! ----------------------------------------------------- !
175 ! Compute the KKT condition right hand side for a given !
176 ! designx x and set the max and norm values of the !
177 ! residue of KKT system to this%residumax and !
178 ! this%residunorm. !
179 ! !
180 ! The left hand sides of the KKT conditions are computed!
181 ! for the following nonlinear programming problem: !
182 ! Minimize f_0(x) + a_0*z + !
183 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
184 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
185 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
186 ! z >= 0, y_i >= 0, i = 1,...,m !
187 ! !
188 ! !
189 ! Note that before calling this function, the function !
190 ! values (f0val, fval, dfdx, ...) should be updated !
191 ! using the new x values. !
192 ! ----------------------------------------------------- !
193 class(mma_t), intent(inout) :: this
194 real(kind=rp), dimension(this%n), intent(in) :: x
195 real(kind=rp), dimension(this%n), intent(in) :: df0dx
196 real(kind=rp), dimension(this%m), intent(in) :: fval
197 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
198
199 real(kind=rp), dimension(this%m) :: relambda, remu
200 real(kind=rp), dimension(2*this%m) :: residual
201
202
203 relambda = fval - this%a%x * this%z - this%y%x + this%mu%x
204 ! Compute residual for mu (eta in the paper)
205 remu = this%lambda%x * this%mu%x
206
207 residual = abs([relambda, remu])
208 this%residumax = maxval(residual)
209 this%residunorm = norm2(residual)
210
211 end subroutine mma_dip_kkt_cpu
212
213 !============================================================================!
214 ! private internal subroutines
215
217 subroutine mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
218 ! ----------------------------------------------------- !
219 ! Generate the approximation sub problem by computing !
220 ! the lower and upper asymtotes and the other necessary !
221 ! parameters (alpha, beta, p0j, q0j, pij, qij, ...). !
222 ! ----------------------------------------------------- !
223 class(mma_t), intent(inout) :: this
224 real(kind=rp), dimension(this%n), intent(in) :: x
225 real(kind=rp), dimension(this%n), intent(in) :: df0dx
226 real(kind=rp), dimension(this%m), intent(in) :: fval
227 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
228 integer, intent(in) :: iter
229 integer :: i, j, ierr
230 real(kind=rp), dimension(this%n) :: x_diff
231 real(kind=rp) :: asy_factor
232
233 x_diff = this%xmax%x - this%xmin%x
234
235 ! ------------------------------------------------------------------------ !
236 ! Setup the current asymptotes
237 associate(low => this%low%x, upp => this%upp%x, &
238 x_1 => this%xold1%x, x_2 => this%xold2%x)
239
240 if (iter .lt. 3) then
241 ! Initialize the lower and upper asymptotes
242 low = x - this%asyinit * x_diff
243 upp = x + this%asyinit * x_diff
244 else
245 do j = 1, this%n
246 if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .lt. 0.0_rp) then
247 asy_factor = this%asydecr
248 else if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .gt. 0.0_rp) then
249 asy_factor = this%asyincr
250 else
251 asy_factor = 1.0_rp
252 end if
253
254 low(j) = x(j) - asy_factor * (x_1(j) - low(j))
255 upp(j) = x(j) + asy_factor * (upp(j) - x_1(j))
256 end do
257
258
259 ! Setting a minimum and maximum for the low and upp
260 ! asymptotes (eq3.9)
261 low = max(low, x - 10.0_rp * x_diff)
262 low = min(low, x - 0.01_rp * x_diff)
263
264 upp = min(upp, x + 10.0_rp * x_diff)
265 upp = max(upp, x + 0.01_rp * x_diff)
266 end if
267
268 end associate
269
270
271 ! ------------------------------------------------------------------------ !
272 ! Set the the bounds and coefficients for the approximation
273 ! the move bounds (alpha and beta) are slightly more restrictive
274 ! than low and upp. This is done based on eq(3.6)--eq(3.10).
275 ! also check
276 ! https://comsolyar.com/wp-content/uploads/2020/03/gcmma.pdf
277 ! eq (2.8) and (2.9)
278
279 associate(alpha => this%alpha%x, beta => this%beta%x, &
280 xmin => this%xmin%x, xmax => this%xmax%x, &
281 low => this%low%x, upp => this%upp%x, x => x)
282
283 alpha = max(xmin, low + 0.1_rp*(x - low), x - 0.5_rp*x_diff)
284 beta = min(xmax, upp - 0.1_rp*(upp - x), x + 0.5_rp*x_diff)
285 end associate
286
287 ! ------------------------------------------------------------------------ !
288 ! Calculate p0j, q0j, pij, qij
289 ! where j = 1,2,...,n and i = 1,2,...,m (eq(2.3)-eq(2.5))
290
291 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
292 pij => this%pij%x, qij => this%qij%x, &
293 low => this%low%x, upp => this%upp%x)
294
295 p0j = ( &
296 1.001_rp * max(df0dx, 0.0_rp) &
297 + 0.001_rp * max(-df0dx, 0.0_rp) &
298 + 0.00001_rp / max(x_diff, 0.00001_rp) &
299 ) * (upp - x)**2
300
301 q0j = ( &
302 0.001_rp * max(df0dx, 0.0_rp) &
303 + 1.001_rp * max(-df0dx, 0.0_rp) &
304 + 0.00001_rp / max(x_diff, 0.00001_rp)&
305 ) * (x - low)**2
306
307 do j = 1, this%n
308 do i = 1, this%m
309 pij(i, j) = ( &
310 1.001_rp * max(dfdx(i, j), 0.0_rp) &
311 + 0.001_rp * max(-dfdx(i, j), 0.0_rp) &
312 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
313 ) * (upp(j) - x(j))**2
314
315 qij(i, j) = ( &
316 0.001_rp * max(dfdx(i, j), 0.0_rp) &
317 + 1.001_rp * max(-dfdx(i, j), 0.0_rp) &
318 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
319 ) * (x(j) - low(j))**2
320 end do
321 end do
322
323 end associate
324
325 ! ------------------------------------------------------------------------ !
326 ! Computing bi as defined in page 5
327
328 associate(bi => this%bi%x, &
329 pij => this%pij%x, qij => this%qij%x, &
330 low => this%low%x, upp => this%upp%x)
331
332 bi = 0.0_rp
333 do i = 1, this%m
334 do j = 1, this%n
335 bi(i) = bi(i) &
336 + pij(i, j) / (upp(j) - x(j)) &
337 + qij(i, j) / (x(j) - low(j))
338 end do
339 end do
340
341 call mpi_allreduce(mpi_in_place, bi, this%m, &
342 mpi_real_precision, mpi_sum, neko_comm, ierr)
343 bi = bi - fval
344
345 end associate
346 end subroutine mma_gensub_cpu
347
350 subroutine mma_subsolve_dpip_cpu(this, designx)
351 ! ------------------------------------------------------- !
352 ! Dual-primal interior point method using Newton's step !
353 ! to solve MMA sub problem. !
354 ! A Backtracking Line Search approach is used to compute !
355 ! the step size; starting with the full Newton's step !
356 ! (delta = 1) and dividing by 2 until we have a step size !
357 ! that leads to a feasible point while ensuring a !
358 ! decrease in the residue. !
359 ! ------------------------------------------------------- !
360
361 class(mma_t), intent(inout) :: this
362 real(kind=rp), dimension(this%n), intent(inout) :: designx
363 ! Note that there is a local dummy "x" in this subroutine, thus, we call
364 ! the current design "designx" instead of just "x"
365 integer :: i, j, k, iter, itto, ierr
366 real(kind=rp) :: epsi, residual_max, residual_norm, &
367 z, zeta, rez, rezeta, &
368 delz, dz, dzeta, &
369 steg, zold, zetaold, new_residual
370 real(kind=rp), dimension(this%m) :: y, lambda, s, mu, &
371 rey, relambda, remu, res, &
372 dely, dellambda, &
373 dy, dlambda, ds, dmu, &
374 yold, lambdaold, sold, muold
375 real(kind=rp), dimension(this%n) :: x, xsi, eta, &
376 rex, rexsi, reeta, &
377 delx, diagx, dx, dxsi, deta, &
378 xold, xsiold, etaold
379 real(kind=rp), dimension(4*this%m + 2) :: residual_small
380 real(kind=rp), dimension(3*this%n + 4*this%m + 2) :: residual
381 real(kind=rp), dimension(2*this%n + 4*this%m + 2) :: xx, dxx
382
383 real(kind=rp), dimension(this%m, this%n) :: gg
384 real(kind=rp), dimension(this%m+1) :: bb
385 real(kind=rp), dimension(this%m+1, this%m+1) :: aa
386 real(kind=rp), dimension(this%m * this%m) :: aa_buffer
387
388 ! using DGESV in lapack to solve
389 ! the linear system which needs the following parameters
390 integer :: info
391 integer, dimension(this%m+1) :: ipiv
392
393 ! Parameters for global communication
394 real(kind=rp) :: re_sq_norm
395 real(kind=rp) :: minimal_epsilon
396
397 integer :: nglobal
398
399 ! ------------------------------------------------------------------------ !
400 ! initial value for the parameters in the subsolve based on
401 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
402
403 epsi = 1.0_rp !100
404 x = 0.5_rp * (this%alpha%x + this%beta%x)
405 y = 1.0_rp
406 z = 1.0_rp
407 zeta = 1.0_rp
408 lambda = 1.0_rp
409 s = 1.0_rp
410 xsi = max(1.0_rp, 1.0_rp / (x - this%alpha%x))
411 eta = max(1.0_rp, 1.0_rp / (this%beta%x - x))
412 mu = max(1.0_rp, 0.5_rp * this%c%x)
413
414 call mpi_allreduce(this%n, nglobal, 1, &
415 mpi_integer, mpi_sum, neko_comm, ierr)
416
417 ! ------------------------------------------------------------------------ !
418 ! Computing the minimal epsilon and choose the most conservative one
419
420 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
421 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
422 mpi_real_precision, mpi_min, neko_comm, ierr)
423
424 ! ------------------------------------------------------------------------ !
425 ! The main loop of the dual-primal interior point method.
426
427 do while (epsi .gt. minimal_epsilon)
428
429 ! --------------------------------------------------------------------- !
430 ! Calculating residuals based on
431 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
432 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
433
434 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
435 pij => this%pij%x, qij => this%qij%x, &
436 low => this%low%x, upp => this%upp%x, &
437 alpha => this%alpha%x, beta => this%beta%x, &
438 c => this%c%x, d => this%d%x, &
439 a0 => this%a0, a => this%a%x, &
440 bi => this%bi%x)
441
442 rex = (p0j + matmul(transpose(pij), lambda)) / (upp - x)**2 &
443 - (q0j + matmul(transpose(qij), lambda)) / (x - low)**2 &
444 - xsi + eta
445
446 rey = c + d * y - lambda - mu
447 rez = a0 - zeta - dot_product(lambda, a)
448
449 relambda = 0.0_rp
450 do i = 1, this%m
451 do j = 1, this%n
452 ! Accumulate sums for relambda (the term gi(x))
453 relambda(i) = relambda(i) &
454 + pij(i, j) / (upp(j) - x(j)) &
455 + qij(i, j) / (x(j) - low(j))
456 end do
457 end do
458
459 end associate
460
461 ! --------------------------------------------------------------------- !
462 ! Computing the norm of the residuals
463
464 ! Complete the computations of lambda residuals
465 call mpi_allreduce(mpi_in_place, relambda, this%m, &
466 mpi_real_precision, mpi_sum, neko_comm, ierr)
467 relambda = relambda - this%a%x*z - y + s - this%bi%x
468
469 rexsi = xsi * (x - this%alpha%x) - epsi
470 reeta = eta * (this%beta%x - x) - epsi
471 remu = mu * y - epsi
472 rezeta = zeta * z - epsi
473 res = lambda * s - epsi
474
475 ! Setup vectors of residuals and their norms
476 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
477 residual_small = [rey, rez, relambda, remu, rezeta, res]
478
479 residual_max = maxval(abs(residual))
480 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
481
482 call mpi_allreduce(mpi_in_place, residual_max, 1, &
483 mpi_real_precision, mpi_max, neko_comm, ierr)
484
485 call mpi_allreduce(mpi_in_place, re_sq_norm, &
486 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
487
488 residual_norm = sqrt(norm2(residual_small)**2 + re_sq_norm)
489
490 ! --------------------------------------------------------------------- !
491 ! Internal loop
492
493 do iter = 1, this%max_iter
494
495 !Check the condition
496 if (residual_max .lt. epsi) exit
497
498 delx = 0.0_rp
499 do j = 1, this%n
500 do i = 1, this%m
501 delx(j) = delx(j) &
502 + this%pij%x(i,j) * lambda(i) / (this%upp%x(j) - x(j))**2 &
503 - this%qij%x(i,j) * lambda(i) / (x(j) - this%low%x(j))**2
504 end do
505 end do
506
507 delx = delx &
508 + this%p0j%x / (this%upp%x - x)**2 &
509 - this%q0j%x / (x - this%low%x)**2 &
510 - epsi / (x - this%alpha%x) &
511 + epsi / (this%beta%x - x)
512
513 dely = this%c%x + this%d%x * y - lambda - epsi / y
514 delz = this%a0 - dot_product(lambda, this%a%x) - epsi / z
515
516 ! Accumulate sums for dellambda (the term gi(x))
517 dellambda = 0.0_rp
518 do i = 1, this%m
519 do j = 1, this%n
520 dellambda(i) = dellambda(i) &
521 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
522 + this%qij%x(i, j) / (x(j) - this%low%x(j))
523 end do
524 end do
525
526 call mpi_allreduce(mpi_in_place, dellambda, this%m, &
527 mpi_real_precision, mpi_sum, neko_comm, ierr)
528
529 dellambda = dellambda - this%a%x*z - y - this%bi%x + epsi / lambda
530
531 do i = 1, this%m
532 gg(i,:) = this%pij%x(i,:) / (this%upp%x - x)**2 &
533 - this%qij%x(i,:) / (x - this%low%x)**2
534 end do
535
536 diagx = &
537 (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
538 / (this%upp%x - x)**3 &
539 + (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
540 / (x - this%low%x)**3
541
542 diagx = 2.0_rp * diagx &
543 + xsi / (x - this%alpha%x) &
544 + eta / (this%beta%x - x)
545
546
547 !Here we only consider the case m<n in the matlab code
548 !assembling the right hand side matrix based on eq(5.20)
549 ! bb = [dellambda + dely/(this%d%x + &
550 ! (mu/y)) - matmul(GG,delx/diagx), delz ]
551
552 !--------------------------------------------------------------------!
553 ! for MPI computation of bb
554
555 bb = 0.0_rp
556 do i = 1, this%m
557 do j = 1, this%n
558 bb(i) = bb(i) + gg(i, j) * (delx(j) / diagx(j))
559 end do
560 end do
561
562 call mpi_allreduce(mpi_in_place, bb, this%m, &
563 mpi_real_precision, mpi_sum, neko_comm, ierr)
564
565 bb(1:this%m) = dellambda + dely / (this%d%x + mu / y) - bb(1:this%m)
566 bb(this%m + 1) = delz
567
568 !--------------------------------------------------------------------!
569 ! assembling the coefficients matrix AA based on eq(5.20)
570 ! AA(1:this%m,1:this%m) = &
571 ! matmul(matmul(GG,mma_diag(1/diagx)), transpose(GG))
572 ! !update diag(AA)
573 ! AA(1:this%m,1:this%m) = AA(1:this%m,1:this%m) + &
574 ! mma_diag(s/lambda + 1.0/(this%d%x + (mu/y)))
575
576 aa = 0.0_rp
577 ! Direct computation of the matrix multiplication
578 ! (for better performance)
579 do i = 1, this%m
580 do j = 1, this%m
581 ! Compute the (i, j) element of AA
582 do k = 1, this%n !this n is global
583 aa(i, j) = aa(i, j) &
584 + gg(i, k) * (1.0_rp / diagx(k)) * gg(j, k)
585 end do
586 end do
587 end do
588
589 aa_buffer = reshape(aa(1:this%m, 1:this%m), [this%m * this%m])
590
591 call mpi_allreduce(mpi_in_place, aa_buffer, &
592 this%m*this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
593
594 aa(1:this%m, 1:this%m) = reshape(aa_buffer, [this%m, this%m])
595
596 do i = 1, this%m
597 ! update the diag AA
598 aa(i, i) = aa(i, i) &
599 + s(i) / lambda(i) &
600 + 1.0_rp / (this%d%x(i) + mu(i) / y(i))
601 end do
602
603 aa(1:this%m, this%m+1) = this%a%x
604 aa(this%m+1, 1:this%m) = this%a%x
605 aa(this%m+1, this%m+1) = - zeta/z
606
607 call dgesv(this%m + 1, 1, aa, this%m + 1, ipiv, bb, this%m + 1, info)
608
609 if (info .ne. 0) then
610 call neko_error("DGESV failed to solve the linear system in " // &
611 "mma_subsolve_dpip.")
612 end if
613
614
615 dlambda = bb(1:this%m)
616 dz = bb(this%m + 1)
617
618 ! based on eq(5.19)
619 dx = - delx / diagx - matmul(transpose(gg), dlambda) / diagx
620 dy = (-dely + dlambda) / (this%d%x + mu / y)
621
622 dxsi = -xsi + (epsi - dx * xsi) / (x - this%alpha%x)
623 deta = -eta + (epsi + dx * eta) / (this%beta%x - x)
624 dmu = -mu + (epsi - mu * dy) / y
625 dzeta = -zeta + (epsi - zeta * dz) / z
626 ds = -s + (epsi - dlambda * s) / lambda
627
628 dxx = [dy, dz, dlambda, dxsi, deta, dmu, dzeta, ds]
629 xx = [y, z, lambda, xsi, eta, mu, zeta, s]
630
631 steg = 1.0_rp / maxval([ &
632 1.0_rp, &
633 -1.01_rp * dxx / xx, &
634 -1.01_rp * dx / (x - this%alpha%x), &
635 1.01_rp * dx / (this%beta%x - x) &
636 ])
637
638 ! Save the old values
639 xold = x
640 yold = y
641 zold = z
642 lambdaold = lambda
643 xsiold = xsi
644 etaold = eta
645 muold = mu
646 zetaold = zeta
647 sold = s
648
649 new_residual = 2.0_rp * residual_norm
650
651 ! Share the new_residual and steg values
652 call mpi_allreduce(mpi_in_place, steg, 1, &
653 mpi_real_precision, mpi_min, neko_comm, ierr)
654 call mpi_allreduce(mpi_in_place, new_residual, 1, &
655 mpi_real_precision, mpi_min, neko_comm, ierr)
656
657 ! The innermost loop to determine the suitable step length
658 ! using the Backtracking Line Search approach
659 itto = 0
660 do while ((new_residual .gt. residual_norm) .and. (itto .lt. 50))
661 itto = itto + 1
662
663 ! update the variables
664 x = xold + steg*dx
665 y = yold + steg*dy
666 z = zold + steg*dz
667
668 lambda = lambdaold + steg*dlambda
669 xsi = xsiold + steg*dxsi
670 eta = etaold + steg*deta
671 mu = muold + steg*dmu
672 zeta = zetaold + steg*dzeta
673 s = sold + steg*ds
674
675 ! Recompute the new_residual to see if this stepsize improves
676 ! the residue
677 rex = (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
678 / (this%upp%x - x)**2 &
679 - (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
680 / (x - this%low%x)**2 &
681 - xsi + eta
682
683 rey = this%c%x + this%d%x*y - lambda - mu
684 rez = this%a0 - zeta - dot_product(lambda, this%a%x)
685
686 ! Accumulate sums for relambda (the term gi(x))
687 relambda = 0.0_rp
688 do i = 1, this%m
689 do j = 1, this%n
690 relambda(i) = relambda(i) &
691 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
692 + this%qij%x(i, j) / (x(j) - this%low%x(j))
693 end do
694 end do
695
696 call mpi_allreduce(mpi_in_place, relambda, this%m, &
697 mpi_real_precision, mpi_sum, neko_comm, ierr)
698
699 relambda = relambda - this%a%x*z - y + s - this%bi%x
700
701 rexsi = xsi * (x - this%alpha%x) - epsi
702 reeta = eta * (this%beta%x - x) - epsi
703 remu = mu * y - epsi
704 rezeta = zeta * z - epsi
705 res = lambda * s - epsi
706
707 ! Compute squared norms for the residuals
708 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
709 call mpi_allreduce(mpi_in_place, re_sq_norm, &
710 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
711
712 residual_small = [rey, rez, relambda, remu, rezeta, res]
713 new_residual = sqrt(norm2(residual_small)**2 + re_sq_norm)
714
715 steg = steg / 2.0_rp
716 end do
717 steg = 2.0_rp * steg ! Correction for the final division by 2
718
719 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
720
721 ! Update the maximum and norm of the residuals
722 residual_norm = new_residual
723 residual_max = maxval(abs(residual))
724 call mpi_allreduce(mpi_in_place, residual_max, 1, &
725 mpi_real_precision, mpi_max, neko_comm, ierr)
726 end do
727
728 epsi = 0.1_rp * epsi
729 end do
730
731 ! Save the new designx
732 this%xold2%x = this%xold1%x
733 this%xold1%x = designx
734 designx = x
735
736 !update the parameters of the MMA object nesessary to compute KKT residual
737 this%y%x = y
738 this%z = z
739 this%lambda%x = lambda
740 this%zeta = zeta
741 this%xsi%x = xsi
742 this%eta%x = eta
743 this%mu%x = mu
744 this%s%x = s
745
746 end subroutine mma_subsolve_dpip_cpu
747
750 subroutine mma_subsolve_dip_cpu(this, designx)
751 ! ------------------------------------------------------------------------ !
752 ! -------------------------------Dual Solver------------------------------ !
753 ! ------------------------------------------------------------------------ !
754 ! This implementation is based on: !
755 ! https://doi.org/10.1007/s00158-012-0869-2 !
756 ! Definition of the Lagrangian function: !
757 ! !
758 ! L(x, y, z, λ) = !
759 ! sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i * p_{ij}) / (u_j - x_j)!
760 ! + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) / (x_j - l_j) ]!
761 ! - sum_{i=1}^{m} λ_i * b_i !
762 ! + sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ] !
763 ! + (a_0 - sum_{i=1}^{m} λ_i * a_i) * z !
764 ! !
765 ! Breakdown of terms: !
766 ! - Terms related to x: L_x (the first three lines of L(x, y, z, λ)) !
767 ! - Terms related to y: L_y (the fourth line of L(x, y, z, λ)) !
768 ! - Terms related to z: L_z (the last line of L(x, y, z, λ)) !
769 ! !
770 ! Optimization problem if λ is given: !
771 ! !
772 ! Minimize L(x, y, z, λ) !
773 ! subject to: α_j ≤ x_j ≤ β_j, z ≥ 0, and y_i ≥ 0 for all i, j. !
774 ! !
775 ! Since the problem is separable: !
776 ! Ψ(λ) = !
777 ! sum_{j=1}^{n} min_xj {L_x(x_j, λ) | α_j ≤ x_j ≤ β_j} !
778 ! + min_z {L_z(z, λ) | z ≥ 0} !
779 ! + sum_{i=1}^{m} min_yi {L_y(y_i, λ) | y_i ≥ 0} !
780 ! !
781 ! Maximize Ψ(λ) subject to λ_i ≥ 0 for i = 1, ..., m. !
782 ! !
783 ! ------------------------------------------------------------------------ !
784
785 class(mma_t), intent(inout) :: this
786 real(kind=rp), dimension(this%n), intent(inout) :: designx
787 ! Note that there is a local dummy "x" in this subroutine, thus, we call
788 ! the current design "designx" instead of just "x"
789 integer :: i, j, k, iter, ierr
790 real(kind=rp) :: epsi, residual_max, z, steg
791 real(kind=rp), dimension(this%m) :: y, lambda, mu, &
792 relambda, remu, dlambda, dmu, gradlambda
793 real(kind=rp), dimension(this%n) :: x, pjlambda, qjlambda
794
795 ! To compute the Hessian based on eq(13)
796 ! https://doi.org/10.1007/s00158-012-0869-2
797
798 ! inverse of a diag matrix:
799 real(kind=rp), dimension(this%n) :: ljjxinv ! [∇_x^2 Ljj]−1
800 real(kind=rp), dimension(this%m,this%n) :: hijx ! ∇_x hij
801 real(kind=rp), dimension(this%m,this%m) :: hess
802 real(kind=rp) :: hesstrace
803
804
805 ! using DGESV in lapack to solve
806 ! the linear system which needs the following parameters
807 integer :: info
808 integer, dimension(this%m+1) :: ipiv
809
810 ! Parameters for global communication
811 real(kind=rp) :: minimal_epsilon
812
813 integer :: nglobal
814
815 ! ------------------------------------------------------------------------ !
816 ! initial value for the parameters in the subsolve based on
817 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
818
819 epsi = 1.0_rp !100
820 ! x = 0.5_rp * (this%alpha%x + this%beta%x)
821 y = 1.0_rp
822 z = 0.0_rp
823 lambda = max(1.0_rp, 0.5_rp * this%c%x)
824 mu = 1.0_rp !this parameter is eta in Niel's paper
825 ! note that mu in the paper translates to epsi in the code following the
826 ! same style as the Cpp code by Neils
827
828 call mpi_allreduce(this%n, nglobal, 1, &
829 mpi_integer, mpi_sum, neko_comm, ierr)
830
831 ! ------------------------------------------------------------------------ !
832 ! Computing the minimal epsilon and choose the most conservative one
833
834 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
835 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
836 mpi_real_precision, mpi_min, neko_comm, ierr)
837
838 ! ------------------------------------------------------------------------ !
839 ! The main loop of the dual-primal interior point method.
840
841 do while (epsi .gt. minimal_epsilon)
842
843 ! --------------------------------------------------------------------- !
844 ! Calculating residuals based on
845 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
846 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
847
848 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
849 pij => this%pij%x, qij => this%qij%x, &
850 low => this%low%x, upp => this%upp%x, &
851 alpha => this%alpha%x, beta => this%beta%x, &
852 c => this%c%x, d => this%d%x, &
853 a0 => this%a0, a => this%a%x, &
854 bi => this%bi%x)
855 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
856 ! the initial value of λ
857
858 ! Comput the value of y that minimizes L_y for the current λ
859 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ])
860 ! dL_y/dy =0 => y= (λ_i - c_i)/d_i, ensure y>=0
861 do i=1, this%m
862 if (abs(d(i)) < 1.0e-15_rp) then
863 ! to avoid devision by zero in case d=0
864 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (1.0e-8_rp))
865 ! y(i) = merge(0.0_rp, 1.0_rp, (lambda(i) - c(i)) >= 0.0_rp)
866 else
867 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (d(i)))
868 end if
869 end do
870
871 ! Comput the value of z that minimizes L_z for the current λ
872 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z)
873 ! if (a_0-dot_product(lambda, a)>=0) z=0 else z= 1.0
874 ! ensure z>=0
875 z = merge(0.0_rp, 1.0_rp, a0 - dot_product(lambda, a) >= 0.0_rp)
876
877 ! Comput the value of x that minimizes L_x for the current λ
878 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i *
879 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
880 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
881 pjlambda = (p0j + matmul(transpose(pij), lambda))
882 qjlambda = (q0j + matmul(transpose(qij), lambda))
883 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
884 (sqrt(pjlambda) + sqrt(qjlambda))
885
886 ! Ensure that x is feasible (alpha<=x<=beta)
887 x = merge(alpha, x, x .lt. alpha)
888 x = merge(beta, x, x .gt. beta)
889
890 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
891 relambda = matmul(pij, 1/(upp - x)) + matmul(qij, 1/(x - low))
892
893 ! Global comminucation for relambda values
894 call mpi_allreduce(mpi_in_place, relambda, this%m, &
895 mpi_real_precision, mpi_sum, neko_comm, ierr)
896 relambda = relambda - bi - y - a * z + mu
897
898 ! Compute residual for mu (eta in the paper)
899 remu = mu * lambda - epsi
900
901 residual_max = maxval(abs([relambda, remu]))
902
903 ! ------------------------------------------------------------------- !
904 ! Internal loop
905 do iter = 1, this%max_iter
906
907 !Check the condition
908 if (residual_max .lt. epsi) exit
909
910 ! Compute dL(x, y, z, λ)/dλ for the updated x(λ), y(λ), z(λ)
911
912 gradlambda = matmul(pij, 1/(upp - x)) + matmul(qij, 1/(x - low))
913
914 ! Global comminucation for gradlambda values
915 call mpi_allreduce(mpi_in_place, gradlambda, this%m, &
916 mpi_real_precision, mpi_sum, neko_comm, ierr)
917 gradlambda = gradlambda - bi - y - a * z
918
919 ! Update gradlambda as the right hand side for Newton's method(eq10)
920 gradlambda = - gradlambda - epsi / lambda
921
922 ! Computing the Hessian as in equation (13) in
923 !! https://doi.org/10.1007/s00158-012-0869-2
924
925 !--------------contributions of x terms to Hess--------------------!
926 ljjxinv= - 1.0_rp / ( (2*pjlambda/(upp - x)**3) + &
927 (2.0_rp*qjlambda/(x - low)**3))
928
929 ! Remove the sensitivity for the active primal constraints
930 ljjxinv = merge(0.0_rp, ljjxinv, x - alpha < 1.0e-15_rp)
931 ljjxinv = merge(0.0_rp, ljjxinv, beta - x < 1.0e-15_rp)
932
933 do i = 1, this%m
934 hijx(i,:) = pij(i,:) / (upp - x)**2 &
935 - qij(i,:) / (x - low)**2
936 end do
937
938 hess = 0.0_rp
939 ! Direct computation of the matrix multiplication
940 ! (for better performance)
941 do i = 1, this%m
942 do j = 1, this%m
943 ! Compute the (i, j) element of AA
944 do k = 1, this%n !this n is global
945 hess(i, j) = hess(i, j) &
946 + hijx(i, k) * (ljjxinv(k)) * hijx(j, k)
947 end do
948 end do
949 end do
950
951 call mpi_allreduce(mpi_in_place, hess, &
952 this%m*this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
953
954 !---------------contributions of z terms to Hess-------------------!
955 ! There is no contibution to the Hess from z terms as z terms are
956 ! linear w.r.t λ
957
958 !---------------contributions of y terms to Hess-------------------!
959 ! Only for inactive constraint, we consider contributions to Hess.
960 ! Note that if d(i) = 0, the y terms (just like z terms) will not
961 ! contribute to the Hessian matrix.
962 do i = 1, this%m
963 if (y(i) .gt. 0.0_rp) then
964 if (abs(d(i)) < 1.0e-15_rp) then
965 ! Hess(i, i) = Hess(i, i) - 1.0_rp/1.0e-8_rp
966 else
967 hess(i, i) = hess(i, i) - 1.0_rp/d(i)
968 end if
969 end if
970 ! Based on eq(10), note the term (-\Omega \Lambda)
971 hess(i, i) = hess(i, i) - mu(i) / lambda(i)
972 end do
973
974 ! Improve the robustness by stablizing the Hess using
975 ! Levenberg-Marquardt algorithm (heuristically)
976 hesstrace = 0.0_rp
977 do i=1, this%m
978 hesstrace = hesstrace + hess(i, i)
979 end do
980 do i=1, this%m
981 hess(i,i) = hess(i, i) - &
982 max(-1.0e-4_rp*hesstrace/this%m, 1.0e-7_rp)
983 end do
984
985 call dgesv(this%m , 1, hess, this%m , ipiv, &
986 gradlambda, this%m, info)
987
988 if (info .ne. 0) then
989 call neko_error("DGESV failed to solve the linear system in " // &
990 "mma_subsolve_dip.")
991 end if
992 dlambda = gradlambda
993
994 ! based on eq(11) for delta eta
995 dmu = -mu + epsi / lambda - dlambda * mu / lambda
996
997 ! Compute the stepsize and update lambda and mu (eta in the paper)
998
999 steg = 1.005_rp
1000 do i = 1, this%m
1001 steg = merge(-1.01_rp * dlambda(i) / lambda(i), steg, &
1002 steg < -1.01_rp * dlambda(i) / lambda(i))
1003 steg = merge(-1.01_rp * dmu(i) / mu(i), &
1004 steg, steg < -1.01_rp * dmu(i) / mu(i))
1005 end do
1006
1007 steg = 1.0_rp / steg
1008
1009 lambda = lambda + steg*dlambda
1010 mu = mu + steg*dmu
1011
1012 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
1013 ! the updated values of λ
1014
1015 ! Comput the value of y that minimizes L_y for the current λ
1016 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ])
1017 ! dL_y/dy =0 => y= (λ_i - c_i)/d_i, ensure y>=0
1018 do i=1, this%m
1019 if (abs(d(i)) < 1.0e-15_rp) then
1020 ! to avoid devision by zero in case d=0
1021 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (1.0e-8_rp))
1022 ! y(i) = merge(0.0_rp, 1.0_rp, (lambda(i) - c(i)) >= 0.0_rp)
1023 else
1024 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (d(i)))
1025 end if
1026 end do
1027
1028 ! Comput the value of z that minimizes L_z for the current λ
1029 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z)
1030 ! if (a_0-dot_product(lambda, a)>=0) z=0 else z= 1.0
1031 ! ensure z>=0
1032 z = merge(0.0_rp, 1.0_rp, a0 - dot_product(lambda, a) >= 0.0_rp)
1033
1034 ! Comput the value of x that minimizes L_x for the current λ
1035 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i *
1036 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
1037 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
1038 pjlambda = (p0j + matmul(transpose(pij), lambda))
1039 qjlambda = (q0j + matmul(transpose(qij), lambda))
1040 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
1041 (sqrt(pjlambda) + sqrt(qjlambda))
1042
1043 ! Ensure that x is feasible (alpha<=x<=beta)
1044 x = merge(alpha, x, x .lt. alpha)
1045 x = merge(beta, x, x .gt. beta)
1046
1047 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
1048 relambda = matmul(pij, 1/(upp - x)) + matmul(qij, 1/(x - low))
1049 ! Global comminucation for relambda values
1050 call mpi_allreduce(mpi_in_place, relambda, this%m, &
1051 mpi_real_precision, mpi_sum, neko_comm, ierr)
1052 relambda = relambda - bi - y - a * z + mu
1053
1054 ! Compute residual for mu (eta in the paper)
1055 remu = mu * lambda - epsi
1056
1057 residual_max = maxval(abs([relambda, remu]))
1058 end do
1059 end associate
1060
1061 epsi = 0.1_rp * epsi
1062 end do
1063
1064 ! Save the new designx
1065 this%xold2%x = this%xold1%x
1066 this%xold1%x = designx
1067 designx = x
1068
1069 !update the parameters of the MMA object nesessary to compute KKT residual
1070
1071 this%y%x = y
1072 this%z = z
1073 this%lambda%x = lambda
1074 this%mu%x = mu
1075 end subroutine mma_subsolve_dip_cpu
1076
1077end submodule mma_cpu