Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
mma_cpu.f90
1! Copyright (c) 2025, The Neko-TOP Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32
33submodule(mma) mma_cpu
34 use lapack_interfaces, only: dgesv
35 use mpi_f08, only: mpi_in_place, mpi_max, mpi_min
36 use comm, only: neko_comm, pe_rank, mpi_real_precision
37 use math, only: neko_eps
38 use profiler, only: profiler_start_region, profiler_end_region
39
40 implicit none
41
42contains
43
44 module subroutine mma_update_cpu(this, iter, x, df0dx, fval, dfdx)
45 ! ----------------------------------------------------- !
46 ! Update the design variable x by solving the convex !
47 ! approximation of the problem. !
48 ! !
49 ! This subroutine is called in each iteration of the !
50 ! optimization loop !
51 ! ----------------------------------------------------- !
52 class(mma_t), intent(inout) :: this
53 integer, intent(in) :: iter
54 real(kind=rp), dimension(this%n), intent(inout) :: x
55 real(kind=rp), dimension(this%n), intent(in) :: df0dx
56 real(kind=rp), dimension(this%m), intent(in) :: fval
57 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
58
59 if (.not. this%is_initialized) then
60 call neko_error("The MMA object is not initialized.")
61 end if
62
63 call profiler_start_region("MMA update")
64
65 ! generate a convex approximation of the problem
66 call profiler_start_region("MMA gensub")
67 call mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
68 call profiler_end_region("MMA gensub")
69
70 !solve the approximation problem using interior point method
71 call profiler_start_region("MMA subsolve")
72 if (this%subsolver .eq. "dip") then
73 call mma_subsolve_dip_cpu(this, x)
74 else
75 call mma_subsolve_dpip_cpu(this, x)
76 end if
77 call profiler_end_region("MMA subsolve")
78
79 call profiler_end_region("MMA update")
80
81 this%is_updated = .true.
82 end subroutine mma_update_cpu
83
85 module subroutine mma_kkt_cpu(this, x, df0dx, fval, dfdx)
86 ! ----------------------------------------------------- !
87 ! Compute the KKT condition right hand side for a given !
88 ! designx x and set the max and norm values of the !
89 ! residue of KKT system to this%residumax and !
90 ! this%residunorm. !
91 ! !
92 ! The left hand sides of the KKT conditions are computed!
93 ! for the following nonlinear programming problem: !
94 ! Minimize f_0(x) + a_0*z + !
95 ! sum(c_i*y_i + 0.5*d_i*(y_i)^2)!
96 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
97 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
98 ! z >= 0, y_i >= 0, i = 1,...,m !
99 ! !
100 ! !
101 ! Note that before calling this function, the function !
102 ! values (f0val, fval, dfdx, ...) should be updated !
103 ! using the new x values. !
104 ! ----------------------------------------------------- !
105 class(mma_t), intent(inout) :: this
106 real(kind=rp), dimension(this%n), intent(in) :: x
107 real(kind=rp), dimension(this%n), intent(in) :: df0dx
108 real(kind=rp), dimension(this%m), intent(in) :: fval
109 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
110
111 call profiler_start_region("MMA KKT computation")
112
113 if (this%subsolver .eq. "dip") then
114 call mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
115 else
116 call mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
117 end if
118
119 call profiler_end_region("MMA KKT computation")
120 end subroutine mma_kkt_cpu
121
123 ! point method (dpip) subsolve of MMA algorithm.
124 module subroutine mma_dpip_kkt_cpu(this, x, df0dx, fval, dfdx)
125 ! ----------------------------------------------------- !
126 ! Compute the KKT condition right hand side for a given !
127 ! designx x and set the max and norm values of the !
128 ! residue of KKT system to this%residumax and !
129 ! this%residunorm. !
130 ! !
131 ! The left hand sides of the KKT conditions are computed!
132 ! for the following nonlinear programming problem: !
133 ! Minimize f_0(x) + a_0*z + !
134 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
135 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
136 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
137 ! z >= 0, y_i >= 0, i = 1,...,m !
138 ! !
139 ! !
140 ! Note that before calling this function, the function !
141 ! values (f0val, fval, dfdx, ...) should be updated !
142 ! using the new x values. !
143 ! ----------------------------------------------------- !
144 class(mma_t), intent(inout) :: this
145 real(kind=rp), dimension(this%n), intent(in) :: x
146 real(kind=rp), dimension(this%n), intent(in) :: df0dx
147 real(kind=rp), dimension(this%m), intent(in) :: fval
148 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
149
150 real(kind=rp) :: rez, rezeta
151 real(kind=rp), dimension(this%m) :: rey, relambda, remu, res
152 real(kind=rp), dimension(this%n) :: rex, rexsi, reeta
153 real(kind=rp), dimension(3*this%n+4*this%m+2) :: residual
154
155 real(kind=rp), dimension(4*this%m+2) :: residual_small
156 integer :: ierr
157 real(kind=rp) :: re_sq_norm
158
159 rex = df0dx + matmul(transpose(dfdx), this%lambda%x) &
160 - this%xsi%x + this%eta%x
161 rey = this%c%x + this%d%x*this%y%x - this%lambda%x - this%mu%x
162 rez = this%a0 - this%zeta - dot_product(this%lambda%x, this%a%x)
163
164 relambda = fval - this%a%x * this%z - this%y%x + this%s%x
165 rexsi = this%xsi%x * (x - this%xmin%x)
166 reeta = this%eta%x * (this%xmax%x - x)
167 remu = this%mu%x * this%y%x
168 rezeta = this%zeta * this%z
169 res = this%lambda%x * this%s%x
170
171 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
172 residual_small = [rey, rez, relambda, remu, rezeta, res]
173
174 this%residumax = maxval(abs(residual))
175 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
176
177 call mpi_allreduce(mpi_in_place, this%residumax, 1, &
178 mpi_real_precision, mpi_max, neko_comm, ierr)
179
180 call mpi_allreduce(mpi_in_place, re_sq_norm, 1, &
181 mpi_real_precision, mpi_sum, neko_comm, ierr)
182
183 this%residunorm = sqrt(norm2(residual_small)**2 + re_sq_norm)
184 end subroutine mma_dpip_kkt_cpu
185
187 ! point method (dip) subsolve of MMA algorithm.
188 module subroutine mma_dip_kkt_cpu(this, x, df0dx, fval, dfdx)
189 ! ----------------------------------------------------- !
190 ! Compute the KKT condition right hand side for a given !
191 ! designx x and set the max and norm values of the !
192 ! residue of KKT system to this%residumax and !
193 ! this%residunorm. !
194 ! !
195 ! The left hand sides of the KKT conditions are computed!
196 ! for the following nonlinear programming problem: !
197 ! Minimize f_0(x) + a_0*z + !
198 ! sum( c_i*y_i + 0.5*d_i*(y_i)^2 )!
199 ! subject to f_i(x) - a_i*z - y_i <= 0, i = 1,...,m !
200 ! xmax_j <= x_j <= xmin_j, j = 1,...,n !
201 ! z >= 0, y_i >= 0, i = 1,...,m !
202 ! !
203 ! !
204 ! Note that before calling this function, the function !
205 ! values (f0val, fval, dfdx, ...) should be updated !
206 ! using the new x values. !
207 ! ----------------------------------------------------- !
208 class(mma_t), intent(inout) :: this
209 real(kind=rp), dimension(this%n), intent(in) :: x
210 real(kind=rp), dimension(this%n), intent(in) :: df0dx
211 real(kind=rp), dimension(this%m), intent(in) :: fval
212 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
213
214 real(kind=rp), dimension(this%m) :: relambda, remu
215 real(kind=rp), dimension(2*this%m) :: residual
216
217
218 relambda = fval - this%a%x * this%z - this%y%x + this%mu%x
219 ! Compute residual for mu (eta in the paper)
220 remu = this%lambda%x * this%mu%x
221
222 residual = abs([relambda, remu])
223 this%residumax = maxval(residual)
224 this%residunorm = norm2(residual)
225
226 end subroutine mma_dip_kkt_cpu
227
228 !============================================================================!
229 ! private internal subroutines
230
232 subroutine mma_gensub_cpu(this, iter, x, df0dx, fval, dfdx)
233 ! ----------------------------------------------------- !
234 ! Generate the approximation sub problem by computing !
235 ! the lower and upper asymtotes and the other necessary !
236 ! parameters (alpha, beta, p0j, q0j, pij, qij, ...). !
237 ! ----------------------------------------------------- !
238 class(mma_t), intent(inout) :: this
239 real(kind=rp), dimension(this%n), intent(in) :: x
240 real(kind=rp), dimension(this%n), intent(in) :: df0dx
241 real(kind=rp), dimension(this%m), intent(in) :: fval
242 real(kind=rp), dimension(this%m, this%n), intent(in) :: dfdx
243 integer, intent(in) :: iter
244 integer :: i, j, ierr
245 real(kind=rp), dimension(this%n) :: x_diff
246 real(kind=rp) :: asy_factor
247
248 x_diff = this%xmax%x - this%xmin%x
249
250 ! ------------------------------------------------------------------------ !
251 ! Setup the current asymptotes
252 associate(low => this%low%x, upp => this%upp%x, &
253 x_1 => this%xold1%x, x_2 => this%xold2%x)
254
255 if (iter .lt. 3) then
256 ! Initialize the lower and upper asymptotes
257 low = x - this%asyinit * x_diff
258 upp = x + this%asyinit * x_diff
259 else
260 do j = 1, this%n
261 if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .lt. 0.0_rp) then
262 asy_factor = this%asydecr
263 else if ((x(j) - x_1(j)) * (x_1(j) - x_2(j)) .gt. 0.0_rp) then
264 asy_factor = this%asyincr
265 else
266 asy_factor = 1.0_rp
267 end if
268
269 low(j) = x(j) - asy_factor * (x_1(j) - low(j))
270 upp(j) = x(j) + asy_factor * (upp(j) - x_1(j))
271 end do
272
273
274 ! Setting a minimum and maximum for the low and upp
275 ! asymptotes (eq3.9)
276 low = max(low, x - 10.0_rp * x_diff)
277 low = min(low, x - 0.01_rp * x_diff)
278
279 upp = min(upp, x + 10.0_rp * x_diff)
280 upp = max(upp, x + 0.01_rp * x_diff)
281 end if
282
283 end associate
284
285
286 ! ------------------------------------------------------------------------ !
287 ! Set the the bounds and coefficients for the approximation
288 ! the move bounds (alpha and beta) are slightly more restrictive
289 ! than low and upp. This is done based on eq(3.6)--eq(3.10).
290 ! also check
291 ! https://comsolyar.com/wp-content/uploads/2020/03/gcmma.pdf
292 ! eq (2.8) and (2.9)
293
294 associate(alpha => this%alpha%x, beta => this%beta%x, &
295 xmin => this%xmin%x, xmax => this%xmax%x, &
296 low => this%low%x, upp => this%upp%x, x => x)
297
298 alpha = max(xmin, low + 0.1_rp*(x - low), x - 0.5_rp*x_diff)
299 beta = min(xmax, upp - 0.1_rp*(upp - x), x + 0.5_rp*x_diff)
300 end associate
301
302 ! ------------------------------------------------------------------------ !
303 ! Calculate p0j, q0j, pij, qij
304 ! where j = 1,2,...,n and i = 1,2,...,m (eq(2.3)-eq(2.5))
305
306 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
307 pij => this%pij%x, qij => this%qij%x, &
308 low => this%low%x, upp => this%upp%x)
309
310 p0j = ( &
311 1.001_rp * max(df0dx, 0.0_rp) &
312 + 0.001_rp * max(-df0dx, 0.0_rp) &
313 + 0.00001_rp / max(x_diff, 0.00001_rp) &
314 ) * (upp - x)**2
315
316 q0j = ( &
317 0.001_rp * max(df0dx, 0.0_rp) &
318 + 1.001_rp * max(-df0dx, 0.0_rp) &
319 + 0.00001_rp / max(x_diff, 0.00001_rp)&
320 ) * (x - low)**2
321
322 do j = 1, this%n
323 do i = 1, this%m
324 pij(i, j) = ( &
325 1.001_rp * max(dfdx(i, j), 0.0_rp) &
326 + 0.001_rp * max(-dfdx(i, j), 0.0_rp) &
327 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
328 ) * (upp(j) - x(j))**2
329
330 qij(i, j) = ( &
331 0.001_rp * max(dfdx(i, j), 0.0_rp) &
332 + 1.001_rp * max(-dfdx(i, j), 0.0_rp) &
333 + 0.00001_rp / max(x_diff(j), 0.00001_rp) &
334 ) * (x(j) - low(j))**2
335 end do
336 end do
337
338 end associate
339
340 ! ------------------------------------------------------------------------ !
341 ! Computing bi as defined in page 5
342
343 associate(bi => this%bi%x, &
344 pij => this%pij%x, qij => this%qij%x, &
345 low => this%low%x, upp => this%upp%x)
346
347 bi = 0.0_rp
348 do i = 1, this%m
349 do j = 1, this%n
350 bi(i) = bi(i) &
351 + pij(i, j) / (upp(j) - x(j)) &
352 + qij(i, j) / (x(j) - low(j))
353 end do
354 end do
355
356 call mpi_allreduce(mpi_in_place, bi, this%m, &
357 mpi_real_precision, mpi_sum, neko_comm, ierr)
358 bi = bi - fval
359
360 end associate
361 end subroutine mma_gensub_cpu
362
365 subroutine mma_subsolve_dpip_cpu(this, designx)
366 ! ------------------------------------------------------- !
367 ! Dual-primal interior point method using Newton's step !
368 ! to solve MMA sub problem. !
369 ! A Backtracking Line Search approach is used to compute !
370 ! the step size; starting with the full Newton's step !
371 ! (delta = 1) and dividing by 2 until we have a step size !
372 ! that leads to a feasible point while ensuring a !
373 ! decrease in the residue. !
374 ! ------------------------------------------------------- !
375
376 class(mma_t), intent(inout) :: this
377 real(kind=rp), dimension(this%n), intent(inout) :: designx
378 ! Note that there is a local dummy "x" in this subroutine, thus, we call
379 ! the current design "designx" instead of just "x"
380 integer :: i, j, k, iter, itto, ierr
381 real(kind=rp) :: epsi, residual_max, residual_norm, &
382 z, zeta, rez, rezeta, &
383 delz, dz, dzeta, &
384 steg, zold, zetaold, new_residual
385 real(kind=rp), dimension(this%m) :: y, lambda, s, mu, &
386 rey, relambda, remu, res, &
387 dely, dellambda, &
388 dy, dlambda, ds, dmu, &
389 yold, lambdaold, sold, muold
390 real(kind=rp), dimension(this%n) :: x, xsi, eta, &
391 rex, rexsi, reeta, &
392 delx, diagx, dx, dxsi, deta, &
393 xold, xsiold, etaold
394 real(kind=rp), dimension(4*this%m + 2) :: residual_small
395 real(kind=rp), dimension(3*this%n + 4*this%m + 2) :: residual
396 real(kind=rp), dimension(2*this%n + 4*this%m + 2) :: xx, dxx
397
398 real(kind=rp), dimension(this%m, this%n) :: gg
399 real(kind=rp), dimension(this%m+1) :: bb
400 real(kind=rp), dimension(this%m+1, this%m+1) :: aa
401 real(kind=rp), dimension(this%m * this%m) :: aa_buffer
402
403 ! using DGESV in lapack to solve
404 ! the linear system which needs the following parameters
405 integer :: info
406 integer, dimension(this%m+1) :: ipiv
407
408 ! Parameters for global communication
409 real(kind=rp) :: re_sq_norm
410 real(kind=rp) :: minimal_epsilon
411
412 integer :: nglobal
413
414 ! ------------------------------------------------------------------------ !
415 ! initial value for the parameters in the subsolve based on
416 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
417
418 epsi = 1.0_rp !100
419 x = 0.5_rp * (this%alpha%x + this%beta%x)
420 y = 1.0_rp
421 z = 1.0_rp
422 zeta = 1.0_rp
423 lambda = 1.0_rp
424 s = 1.0_rp
425 xsi = max(1.0_rp, 1.0_rp / (x - this%alpha%x))
426 eta = max(1.0_rp, 1.0_rp / (this%beta%x - x))
427 mu = max(1.0_rp, 0.5_rp * this%c%x)
428
429 call mpi_allreduce(this%n, nglobal, 1, &
430 mpi_integer, mpi_sum, neko_comm, ierr)
431
432 ! ------------------------------------------------------------------------ !
433 ! Computing the minimal epsilon and choose the most conservative one
434
435 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
436 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
437 mpi_real_precision, mpi_min, neko_comm, ierr)
438
439 ! ------------------------------------------------------------------------ !
440 ! The main loop of the dual-primal interior point method.
441
442 do while (epsi .gt. minimal_epsilon)
443
444 ! --------------------------------------------------------------------- !
445 ! Calculating residuals based on
446 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
447 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
448
449 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
450 pij => this%pij%x, qij => this%qij%x, &
451 low => this%low%x, upp => this%upp%x, &
452 alpha => this%alpha%x, beta => this%beta%x, &
453 c => this%c%x, d => this%d%x, &
454 a0 => this%a0, a => this%a%x)
455
456 rex = (p0j + matmul(transpose(pij), lambda)) / (upp - x)**2 &
457 - (q0j + matmul(transpose(qij), lambda)) / (x - low)**2 &
458 - xsi + eta
459
460 rey = c + d * y - lambda - mu
461 rez = a0 - zeta - dot_product(lambda, a)
462
463 relambda = 0.0_rp
464 do i = 1, this%m
465 do j = 1, this%n
466 ! Accumulate sums for relambda (the term gi(x))
467 relambda(i) = relambda(i) &
468 + pij(i, j) / (upp(j) - x(j)) &
469 + qij(i, j) / (x(j) - low(j))
470 end do
471 end do
472
473 end associate
474
475 ! --------------------------------------------------------------------- !
476 ! Computing the norm of the residuals
477
478 ! Complete the computations of lambda residuals
479 call mpi_allreduce(mpi_in_place, relambda, this%m, &
480 mpi_real_precision, mpi_sum, neko_comm, ierr)
481 relambda = relambda - this%a%x*z - y + s - this%bi%x
482
483 rexsi = xsi * (x - this%alpha%x) - epsi
484 reeta = eta * (this%beta%x - x) - epsi
485 remu = mu * y - epsi
486 rezeta = zeta * z - epsi
487 res = lambda * s - epsi
488
489 ! Setup vectors of residuals and their norms
490 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
491 residual_small = [rey, rez, relambda, remu, rezeta, res]
492
493 residual_max = maxval(abs(residual))
494 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
495
496 call mpi_allreduce(mpi_in_place, residual_max, 1, &
497 mpi_real_precision, mpi_max, neko_comm, ierr)
498
499 call mpi_allreduce(mpi_in_place, re_sq_norm, &
500 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
501
502 residual_norm = sqrt(norm2(residual_small)**2 + re_sq_norm)
503
504 ! --------------------------------------------------------------------- !
505 ! Internal loop
506
507 do iter = 1, this%max_iter
508
509 !Check the condition
510 if (residual_max .lt. epsi) exit
511
512 delx = 0.0_rp
513 do j = 1, this%n
514 do i = 1, this%m
515 delx(j) = delx(j) &
516 + this%pij%x(i,j) * lambda(i) / (this%upp%x(j) - x(j))**2 &
517 - this%qij%x(i,j) * lambda(i) / (x(j) - this%low%x(j))**2
518 end do
519 end do
520
521 delx = delx &
522 + this%p0j%x / (this%upp%x - x)**2 &
523 - this%q0j%x / (x - this%low%x)**2 &
524 - epsi / (x - this%alpha%x) &
525 + epsi / (this%beta%x - x)
526
527 dely = this%c%x + this%d%x * y - lambda - epsi / y
528 delz = this%a0 - dot_product(lambda, this%a%x) - epsi / z
529
530 ! Accumulate sums for dellambda (the term gi(x))
531 dellambda = 0.0_rp
532 do i = 1, this%m
533 do j = 1, this%n
534 dellambda(i) = dellambda(i) &
535 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
536 + this%qij%x(i, j) / (x(j) - this%low%x(j))
537 end do
538 end do
539
540 call mpi_allreduce(mpi_in_place, dellambda, this%m, &
541 mpi_real_precision, mpi_sum, neko_comm, ierr)
542
543 dellambda = dellambda - this%a%x*z - y - this%bi%x + epsi / lambda
544
545 do i = 1, this%m
546 gg(i,:) = this%pij%x(i,:) / (this%upp%x - x)**2 &
547 - this%qij%x(i,:) / (x - this%low%x)**2
548 end do
549
550 diagx = &
551 (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
552 / (this%upp%x - x)**3 &
553 + (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
554 / (x - this%low%x)**3
555
556 diagx = 2.0_rp * diagx &
557 + xsi / (x - this%alpha%x) &
558 + eta / (this%beta%x - x)
559
560
561 !Here we only consider the case m<n in the matlab code
562 !assembling the right hand side matrix based on eq(5.20)
563 ! bb = [dellambda + dely/(this%d%x + &
564 ! (mu/y)) - matmul(GG,delx/diagx), delz ]
565
566 !--------------------------------------------------------------------!
567 ! for MPI computation of bb
568
569 bb = 0.0_rp
570 do i = 1, this%m
571 do j = 1, this%n
572 bb(i) = bb(i) + gg(i, j) * (delx(j) / diagx(j))
573 end do
574 end do
575
576 call mpi_allreduce(mpi_in_place, bb, this%m, &
577 mpi_real_precision, mpi_sum, neko_comm, ierr)
578
579 bb(1:this%m) = dellambda + dely / (this%d%x + mu / y) - bb(1:this%m)
580 bb(this%m + 1) = delz
581
582 !--------------------------------------------------------------------!
583 ! assembling the coefficients matrix AA based on eq(5.20)
584 ! AA(1:this%m,1:this%m) = &
585 ! matmul(matmul(GG,mma_diag(1/diagx)), transpose(GG))
586 ! !update diag(AA)
587 ! AA(1:this%m,1:this%m) = AA(1:this%m,1:this%m) + &
588 ! mma_diag(s/lambda + 1.0/(this%d%x + (mu/y)))
589
590 aa = 0.0_rp
591 ! Direct computation of the matrix multiplication
592 ! (for better performance)
593 do i = 1, this%m
594 do j = 1, this%m
595 ! Compute the (i, j) element of AA
596 do k = 1, this%n !this n is global
597 aa(i, j) = aa(i, j) &
598 + gg(i, k) * (1.0_rp / diagx(k)) * gg(j, k)
599 end do
600 end do
601 end do
602
603 aa_buffer = reshape(aa(1:this%m, 1:this%m), [this%m * this%m])
604
605 call mpi_allreduce(mpi_in_place, aa_buffer, &
606 this%m*this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
607
608 aa(1:this%m, 1:this%m) = reshape(aa_buffer, [this%m, this%m])
609
610 do i = 1, this%m
611 ! update the diag AA
612 aa(i, i) = aa(i, i) &
613 + s(i) / lambda(i) &
614 + 1.0_rp / (this%d%x(i) + mu(i) / y(i))
615 end do
616
617 aa(1:this%m, this%m+1) = this%a%x
618 aa(this%m+1, 1:this%m) = this%a%x
619 aa(this%m+1, this%m+1) = - zeta/z
620
621 call dgesv(this%m + 1, 1, aa, this%m + 1, ipiv, bb, this%m + 1, info)
622
623 if (info .ne. 0) then
624 call neko_error("DGESV failed to solve the linear system in " // &
625 "mma_subsolve_dpip.")
626 end if
627
628
629 dlambda = bb(1:this%m)
630 dz = bb(this%m + 1)
631
632 ! based on eq(5.19)
633 dx = - delx / diagx - matmul(transpose(gg), dlambda) / diagx
634 dy = (-dely + dlambda) / (this%d%x + mu / y)
635
636 dxsi = -xsi + (epsi - dx * xsi) / (x - this%alpha%x)
637 deta = -eta + (epsi + dx * eta) / (this%beta%x - x)
638 dmu = -mu + (epsi - mu * dy) / y
639 dzeta = -zeta + (epsi - zeta * dz) / z
640 ds = -s + (epsi - dlambda * s) / lambda
641
642 dxx = [dy, dz, dlambda, dxsi, deta, dmu, dzeta, ds]
643 xx = [y, z, lambda, xsi, eta, mu, zeta, s]
644
645 steg = 1.0_rp / maxval([ &
646 1.0_rp, &
647 -1.01_rp * dxx / xx, &
648 -1.01_rp * dx / (x - this%alpha%x), &
649 1.01_rp * dx / (this%beta%x - x) &
650 ])
651
652 ! Save the old values
653 xold = x
654 yold = y
655 zold = z
656 lambdaold = lambda
657 xsiold = xsi
658 etaold = eta
659 muold = mu
660 zetaold = zeta
661 sold = s
662
663 new_residual = 2.0_rp * residual_norm
664
665 ! Share the new_residual and steg values
666 call mpi_allreduce(mpi_in_place, steg, 1, &
667 mpi_real_precision, mpi_min, neko_comm, ierr)
668 call mpi_allreduce(mpi_in_place, new_residual, 1, &
669 mpi_real_precision, mpi_min, neko_comm, ierr)
670
671 ! The innermost loop to determine the suitable step length
672 ! using the Backtracking Line Search approach
673 itto = 0
674 do while ((new_residual .gt. residual_norm) .and. (itto .lt. 50))
675 itto = itto + 1
676
677 ! update the variables
678 x = xold + steg*dx
679 y = yold + steg*dy
680 z = zold + steg*dz
681
682 lambda = lambdaold + steg*dlambda
683 xsi = xsiold + steg*dxsi
684 eta = etaold + steg*deta
685 mu = muold + steg*dmu
686 zeta = zetaold + steg*dzeta
687 s = sold + steg*ds
688
689 ! Recompute the new_residual to see if this stepsize improves
690 ! the residue
691 rex = (this%p0j%x + matmul(transpose(this%pij%x), lambda)) &
692 / (this%upp%x - x)**2 &
693 - (this%q0j%x + matmul(transpose(this%qij%x), lambda)) &
694 / (x - this%low%x)**2 &
695 - xsi + eta
696
697 rey = this%c%x + this%d%x*y - lambda - mu
698 rez = this%a0 - zeta - dot_product(lambda, this%a%x)
699
700 ! Accumulate sums for relambda (the term gi(x))
701 relambda = 0.0_rp
702 do i = 1, this%m
703 do j = 1, this%n
704 relambda(i) = relambda(i) &
705 + this%pij%x(i, j) / (this%upp%x(j) - x(j)) &
706 + this%qij%x(i, j) / (x(j) - this%low%x(j))
707 end do
708 end do
709
710 call mpi_allreduce(mpi_in_place, relambda, this%m, &
711 mpi_real_precision, mpi_sum, neko_comm, ierr)
712
713 relambda = relambda - this%a%x*z - y + s - this%bi%x
714
715 rexsi = xsi * (x - this%alpha%x) - epsi
716 reeta = eta * (this%beta%x - x) - epsi
717 remu = mu * y - epsi
718 rezeta = zeta * z - epsi
719 res = lambda * s - epsi
720
721 ! Compute squared norms for the residuals
722 re_sq_norm = norm2(rex)**2 + norm2(rexsi)**2 + norm2(reeta)**2
723 call mpi_allreduce(mpi_in_place, re_sq_norm, &
724 1, mpi_real_precision, mpi_sum, neko_comm, ierr)
725
726 residual_small = [rey, rez, relambda, remu, rezeta, res]
727 new_residual = sqrt(norm2(residual_small)**2 + re_sq_norm)
728
729 steg = steg / 2.0_rp
730 end do
731 steg = 2.0_rp * steg ! Correction for the final division by 2
732
733 residual = [rex, rey, rez, relambda, rexsi, reeta, remu, rezeta, res]
734
735 ! Update the maximum and norm of the residuals
736 residual_norm = new_residual
737 residual_max = maxval(abs(residual))
738 call mpi_allreduce(mpi_in_place, residual_max, 1, &
739 mpi_real_precision, mpi_max, neko_comm, ierr)
740 end do
741
742 epsi = 0.1_rp * epsi
743 end do
744
745 ! Save the new designx
746 this%xold2%x = this%xold1%x
747 this%xold1%x = designx
748 designx = x
749
750 !update the parameters of the MMA object nesessary to compute KKT residual
751 this%y%x = y
752 this%z = z
753 this%lambda%x = lambda
754 this%zeta = zeta
755 this%xsi%x = xsi
756 this%eta%x = eta
757 this%mu%x = mu
758 this%s%x = s
759
760 end subroutine mma_subsolve_dpip_cpu
761
764 subroutine mma_subsolve_dip_cpu(this, designx)
765 ! ------------------------------------------------------------------------ !
766 ! -------------------------------Dual Solver------------------------------ !
767 ! ------------------------------------------------------------------------ !
768 ! This implementation is based on: !
769 ! https://doi.org/10.1007/s00158-012-0869-2 !
770 ! Definition of the Lagrangian function: !
771 ! !
772 ! L(x, y, z, λ) = !
773 ! sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i * p_{ij}) / (u_j - x_j)!
774 ! + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) / (x_j - l_j) ]!
775 ! - sum_{i=1}^{m} λ_i * b_i !
776 ! + sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ] !
777 ! + (a_0 - sum_{i=1}^{m} λ_i * a_i) * z !
778 ! !
779 ! Breakdown of terms: !
780 ! - Terms related to x: L_x (the first three lines of L(x, y, z, λ)) !
781 ! - Terms related to y: L_y (the fourth line of L(x, y, z, λ)) !
782 ! - Terms related to z: L_z (the last line of L(x, y, z, λ)) !
783 ! !
784 ! Optimization problem if λ is given: !
785 ! !
786 ! Minimize L(x, y, z, λ) !
787 ! subject to: α_j ≤ x_j ≤ β_j, z ≥ 0, and y_i ≥ 0 for all i, j. !
788 ! !
789 ! Since the problem is separable: !
790 ! Ψ(λ) = !
791 ! sum_{j=1}^{n} min_xj {L_x(x_j, λ) | α_j ≤ x_j ≤ β_j} !
792 ! + min_z {L_z(z, λ) | z ≥ 0} !
793 ! + sum_{i=1}^{m} min_yi {L_y(y_i, λ) | y_i ≥ 0} !
794 ! !
795 ! Maximize Ψ(λ) subject to λ_i ≥ 0 for i = 1, ..., m. !
796 ! !
797 ! ------------------------------------------------------------------------ !
798
799 class(mma_t), intent(inout) :: this
800 real(kind=rp), dimension(this%n), intent(inout) :: designx
801 ! Note that there is a local dummy "x" in this subroutine, thus, we call
802 ! the current design "designx" instead of just "x"
803 integer :: i, j, k, iter, ierr
804 real(kind=rp) :: epsi, residual_max, z, steg
805 real(kind=rp), dimension(this%m) :: y, lambda, mu, &
806 relambda, remu, dlambda, dmu, gradlambda
807 real(kind=rp), dimension(this%n) :: x, pjlambda, qjlambda
808
809 ! To compute the Hessian based on eq(13)
810 ! https://doi.org/10.1007/s00158-012-0869-2
811
812 ! inverse of a diag matrix:
813 real(kind=rp), dimension(this%n) :: ljjxinv ! [∇_x^2 Ljj]−1
814 real(kind=rp), dimension(this%m,this%n) :: hijx ! ∇_x hij
815 real(kind=rp), dimension(this%m,this%m) :: hess
816 real(kind=rp) :: hesstrace
817
818
819 ! using DGESV in lapack to solve
820 ! the linear system which needs the following parameters
821 integer :: info
822 integer, dimension(this%m+1) :: ipiv
823
824 ! Parameters for global communication
825 real(kind=rp) :: minimal_epsilon
826
827 integer :: nglobal
828
829 ! ------------------------------------------------------------------------ !
830 ! initial value for the parameters in the subsolve based on
831 ! page 15 of "https://people.kth.se/~krille/mmagcmma.pdf"
832
833 epsi = 1.0_rp !100
834 ! x = 0.5_rp * (this%alpha%x + this%beta%x)
835 y = 1.0_rp
836 z = 0.0_rp
837 lambda = max(1.0_rp, 0.5_rp * this%c%x)
838 mu = 1.0_rp !this parameter is eta in Niel's paper
839 ! note that mu in the paper translates to epsi in the code following the
840 ! same style as the Cpp code by Neils
841
842 call mpi_allreduce(this%n, nglobal, 1, &
843 mpi_integer, mpi_sum, neko_comm, ierr)
844
845 ! ------------------------------------------------------------------------ !
846 ! Computing the minimal epsilon and choose the most conservative one
847
848 minimal_epsilon = max(0.9_rp * this%epsimin, 1.0e-12_rp)
849 call mpi_allreduce(mpi_in_place, minimal_epsilon, 1, &
850 mpi_real_precision, mpi_min, neko_comm, ierr)
851
852 ! ------------------------------------------------------------------------ !
853 ! The main loop of the dual-primal interior point method.
854
855 do while (epsi .gt. minimal_epsilon)
856
857 ! --------------------------------------------------------------------- !
858 ! Calculating residuals based on
859 ! "https://people.kth.se/~krille/mmagcmma.pdf" for the variables
860 ! x, y, z, lambda residuals based on eq(5.9a)-(5.9d), respectively.
861
862 associate(p0j => this%p0j%x, q0j => this%q0j%x, &
863 pij => this%pij%x, qij => this%qij%x, &
864 low => this%low%x, upp => this%upp%x, &
865 alpha => this%alpha%x, beta => this%beta%x, &
866 c => this%c%x, d => this%d%x, &
867 a0 => this%a0, a => this%a%x, &
868 bi => this%bi%x)
869 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
870 ! the initial value of λ
871
872 ! Comput the value of y that minimizes L_y for the current λ
873 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ])
874 ! dL_y/dy =0 => y= (λ_i - c_i)/d_i, ensure y>=0
875 do i=1, this%m
876 if (abs(d(i)) < neko_eps) then
877 ! to avoid devision by zero in case d=0
878 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (1.0e-8_rp))
879 ! y(i) = merge(0.0_rp, 1.0_rp, (lambda(i) - c(i)) >= 0.0_rp)
880 else
881 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (d(i)))
882 end if
883 end do
884
885 ! Comput the value of z that minimizes L_z for the current λ
886 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z)
887 ! if (a_0-dot_product(lambda, a)>=0) z=0 else z= 1.0
888 ! ensure z>=0
889 z = merge(0.0_rp, 1.0_rp, a0 - dot_product(lambda, a) >= 0.0_rp)
890
891 ! Comput the value of x that minimizes L_x for the current λ
892 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i *
893 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
894 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
895 pjlambda = (p0j + matmul(transpose(pij), lambda))
896 qjlambda = (q0j + matmul(transpose(qij), lambda))
897 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
898 (sqrt(pjlambda) + sqrt(qjlambda))
899
900 ! Ensure that x is feasible (alpha<=x<=beta)
901 x = merge(alpha, x, x .lt. alpha)
902 x = merge(beta, x, x .gt. beta)
903
904 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
905 relambda = matmul(pij, 1.0_rp / (upp - x)) + &
906 matmul(qij, 1.0_rp / (x - low))
907
908 ! Global comminucation for relambda values
909 call mpi_allreduce(mpi_in_place, relambda, this%m, &
910 mpi_real_precision, mpi_sum, neko_comm, ierr)
911 relambda = relambda - bi - y - a * z + mu
912
913 ! Compute residual for mu (eta in the paper)
914 remu = mu * lambda - epsi
915
916 residual_max = maxval(abs([relambda, remu]))
917
918 ! ------------------------------------------------------------------- !
919 ! Internal loop
920 do iter = 1, this%max_iter
921
922 !Check the condition
923 if (residual_max .lt. epsi) exit
924
925 ! Compute dL(x, y, z, λ)/dλ for the updated x(λ), y(λ), z(λ)
926
927 gradlambda = matmul(pij, 1.0_rp / (upp - x)) + &
928 matmul(qij, 1.0_rp/(x - low))
929
930 ! Global comminucation for gradlambda values
931 call mpi_allreduce(mpi_in_place, gradlambda, this%m, &
932 mpi_real_precision, mpi_sum, neko_comm, ierr)
933 gradlambda = gradlambda - bi - y - a * z
934
935 ! Update gradlambda as the right hand side for Newton's method(eq10)
936 gradlambda = - gradlambda - epsi / lambda
937
938 ! Computing the Hessian as in equation (13) in
939 !! https://doi.org/10.1007/s00158-012-0869-2
940
941 !--------------contributions of x terms to Hess--------------------!
942 ljjxinv= - 1.0_rp / ( (2.0_rp * pjlambda/(upp - x)**3) + &
943 (2.0_rp * qjlambda/(x - low)**3))
944
945
946 ! Remove the sensitivity for the active primal constraints
947 ljjxinv = merge(0.0_rp, ljjxinv, x - alpha < neko_eps)
948 ljjxinv = merge(0.0_rp, ljjxinv, beta - x < neko_eps)
949
950 do i = 1, this%m
951 hijx(i,:) = pij(i,:) / (upp - x)**2 &
952 - qij(i,:) / (x - low)**2
953 end do
954
955 hess = 0.0_rp
956 ! Direct computation of the matrix multiplication
957 ! (for better performance)
958 do i = 1, this%m
959 do j = 1, this%m
960 ! Compute the (i, j) element of AA
961 do k = 1, this%n !this n is global
962 hess(i, j) = hess(i, j) &
963 + hijx(i, k) * (ljjxinv(k)) * hijx(j, k)
964 end do
965 end do
966 end do
967
968 call mpi_allreduce(mpi_in_place, hess, &
969 this%m*this%m, mpi_real_precision, mpi_sum, neko_comm, ierr)
970
971 !---------------contributions of z terms to Hess-------------------!
972 ! There is no contibution to the Hess from z terms as z terms are
973 ! linear w.r.t λ
974
975 !---------------contributions of y terms to Hess-------------------!
976 ! Only for inactive constraint, we consider contributions to Hess.
977 ! Note that if d(i) = 0, the y terms (just like z terms) will not
978 ! contribute to the Hessian matrix.
979 do i = 1, this%m
980 if (y(i) .gt. 0.0_rp) then
981 if (abs(d(i)) < neko_eps) then
982 ! Hess(i, i) = Hess(i, i) - 1.0_rp/1.0e-8_rp
983 else
984 hess(i, i) = hess(i, i) - 1.0_rp/d(i)
985 end if
986 end if
987 ! Based on eq(10), note the term (-\Omega \Lambda)
988 hess(i, i) = hess(i, i) - mu(i) / lambda(i)
989 end do
990
991 ! Improve the robustness by stablizing the Hess using
992 ! Levenberg-Marquardt algorithm (heuristically)
993 hesstrace = 0.0_rp
994 do i=1, this%m
995 hesstrace = hesstrace + hess(i, i)
996 end do
997 do i=1, this%m
998 hess(i,i) = hess(i, i) - &
999 max(-1.0e-4_rp*hesstrace/this%m, 1.0e-7_rp)
1000 end do
1001
1002 call dgesv(this%m , 1, hess, this%m , ipiv, &
1003 gradlambda, this%m, info)
1004
1005 if (info .ne. 0) then
1006 call neko_error("DGESV failed to solve the linear system in " // &
1007 "mma_subsolve_dip.")
1008 end if
1009 dlambda = gradlambda
1010
1011 ! based on eq(11) for delta eta
1012 dmu = -mu + epsi / lambda - dlambda * mu / lambda
1013
1014 ! Compute the stepsize and update lambda and mu (eta in the paper)
1015
1016 steg = 1.005_rp
1017 do i = 1, this%m
1018 steg = merge(-1.01_rp * dlambda(i) / lambda(i), steg, &
1019 steg < -1.01_rp * dlambda(i) / lambda(i))
1020 steg = merge(-1.01_rp * dmu(i) / mu(i), &
1021 steg, steg < -1.01_rp * dmu(i) / mu(i))
1022 end do
1023
1024 steg = 1.0_rp / steg
1025
1026 lambda = lambda + steg*dlambda
1027 mu = mu + steg*dmu
1028
1029 ! minimize(L_x, L_y, L_z) and compute x(λ), y(λ), z(λ) for
1030 ! the updated values of λ
1031
1032 ! Comput the value of y that minimizes L_y for the current λ
1033 ! minimize (sum_{i=1}^{m} [ (c_i - λ_i) * y_i + 0.5 * d_i * y_i^2 ])
1034 ! dL_y/dy =0 => y= (λ_i - c_i)/d_i, ensure y>=0
1035 do i=1, this%m
1036 if (abs(d(i)) < neko_eps) then
1037 ! to avoid devision by zero in case d=0
1038 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (1.0e-8_rp))
1039 ! y(i) = merge(0.0_rp, 1.0_rp, (lambda(i) - c(i)) >= 0.0_rp)
1040 else
1041 y(i) = max(0.0_rp, (lambda(i) - c(i)) / (d(i)))
1042 end if
1043 end do
1044
1045 ! Comput the value of z that minimizes L_z for the current λ
1046 ! minimize ((a_0 - sum_{i=1}^{m} λ_i * a_i) * z)
1047 ! if (a_0-dot_product(lambda, a)>=0) z=0 else z= 1.0
1048 ! ensure z>=0
1049 z = merge(0.0_rp, 1.0_rp, a0 - dot_product(lambda, a) >= 0.0_rp)
1050
1051 ! Comput the value of x that minimizes L_x for the current λ
1052 ! minimize( sum_{j=1}^{n} [ (p_{0j} + sum_{i=1}^{m} λ_i *
1053 ! p_{ij}) / (u_j - x_j) + (q_{0j} + sum_{i=1}^{m} λ_i * q_{ij}) /
1054 ! (x_j - l_j) ] - sum_{i=1}^{m} λ_i * b_i)
1055 pjlambda = (p0j + matmul(transpose(pij), lambda))
1056 qjlambda = (q0j + matmul(transpose(qij), lambda))
1057 x = (sqrt(pjlambda) * low + sqrt(qjlambda) * upp) / &
1058 (sqrt(pjlambda) + sqrt(qjlambda))
1059
1060 ! Ensure that x is feasible (alpha<=x<=beta)
1061 x = merge(alpha, x, x .lt. alpha)
1062 x = merge(beta, x, x .gt. beta)
1063
1064 ! Compute the residual for the lambda and mu using eq(9) and eq(15)
1065 relambda = matmul(pij, 1.0_rp / (upp - x)) + &
1066 matmul(qij, 1.0_rp / (x - low))
1067 ! Global comminucation for relambda values
1068 call mpi_allreduce(mpi_in_place, relambda, this%m, &
1069 mpi_real_precision, mpi_sum, neko_comm, ierr)
1070 relambda = relambda - bi - y - a * z + mu
1071
1072 ! Compute residual for mu (eta in the paper)
1073 remu = mu * lambda - epsi
1074
1075 residual_max = maxval(abs([relambda, remu]))
1076 end do
1077 end associate
1078
1079 epsi = 0.1_rp * epsi
1080 end do
1081
1082 ! Save the new designx
1083 this%xold2%x = this%xold1%x
1084 this%xold1%x = designx
1085 designx = x
1086
1087 !update the parameters of the MMA object nesessary to compute KKT residual
1088
1089 this%y%x = y
1090 this%z = z
1091 this%lambda%x = lambda
1092 this%mu%x = mu
1093 end subroutine mma_subsolve_dip_cpu
1094
1095end submodule mma_cpu