Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
cuda_mma_math.f90
1! Copyright (c) 2025, The Neko-TOP Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
33module cuda_mma_math
34 use num_types, only: rp, c_rp
35 use, intrinsic :: iso_c_binding, only: c_int, c_ptr
36
37 implicit none
38 public
39
40 interface
41 subroutine mma_prepare_aa_matrix_cuda(AA, s, lambda, d, mu, y, a, zeta, &
42 z, m) bind(c, name="mma_prepare_aa_matrix_cuda")
43 import c_rp, c_ptr, c_int
44 type(c_ptr), value :: AA, s, lambda, d, mu, y, a
45 real(c_rp) :: zeta, z
46 integer(c_int) :: m
47 end subroutine mma_prepare_aa_matrix_cuda
48
49 subroutine mma_prepare_hessian_cuda(Hess, y, d, mu, lambda, m) &
50 bind(c, name="mma_prepare_hessian_cuda")
51 import c_ptr, c_int
52 type(c_ptr), value :: Hess, y, d, mu, lambda
53 integer(c_int) :: m
54 end subroutine mma_prepare_hessian_cuda
55
56 subroutine cuda_custom_solver(A_d, b_d, n, info) &
57 bind(c, name = 'cuda_custom_solver')
58 import c_int, c_ptr
59 type(c_ptr), value :: A_d, b_d
60 integer(c_int), value :: n
61 integer(c_int) :: info
62 end subroutine cuda_custom_solver
63
64 subroutine cusolver_wrapper(A_d, b_d, n, info) &
65 bind(c, name = 'cuSOLVER_wrapper')
66 import c_int, c_ptr
67 type(c_ptr), value :: A_d, b_d
68 integer(c_int), value :: n
69 integer(c_int) :: info
70 end subroutine cusolver_wrapper
71
72 subroutine delta_1dbeam_cuda(Delta_d, L_total, Le, offset, n) &
73 bind(c, name = 'delta_1dbeam_cuda')
74 import c_rp, c_int, c_ptr
75 type(c_ptr), value :: Delta_d
76 real(c_rp) :: L_total, Le
77 integer(c_int) :: offset, n
78 end subroutine delta_1dbeam_cuda
79
80 subroutine cuda_hess(Hess_d, hijx_d, Ljjxinv_d, n, m) bind(c, name = 'cuda_Hess')
81 import c_int, c_ptr
82 type(c_ptr), value :: Hess_d, hijx_d, Ljjxinv_d
83 integer(c_int) :: n, m
84 end subroutine cuda_hess
85
86 subroutine mma_ljjxinv_cuda(Ljjxinv_d,pjlambda_d, qjlambda_d, x_d, &
87 low_d, upp_d, alpha_d, beta_d, n) bind(c, name = 'mma_Ljjxinv_cuda')
88 import c_int, c_ptr
89 type(c_ptr), value :: Ljjxinv_d, x_d, pjlambda_d, qjlambda_d, low_d, &
90 upp_d, alpha_d, beta_d
91 integer(c_int) :: n
92 end subroutine mma_ljjxinv_cuda
93
94 subroutine mma_dipsolvesub1_cuda(x_d, pjlambda_d, qjlambda_d, low_d, &
95 upp_d, alpha_d, beta_d, n) bind(c, name = 'mma_dipsolvesub1_cuda')
96 import c_int, c_ptr
97 type(c_ptr), value :: x_d, pjlambda_d, qjlambda_d, low_d, &
98 upp_d, alpha_d, beta_d
99 integer(c_int) :: n
100 end subroutine mma_dipsolvesub1_cuda
101
102 subroutine mattrans_v_mul_cuda(output_d, pij_d, lambda_d, m, n) &
103 bind(c, name = 'mattrans_v_mul_cuda')
104 import c_rp, c_int, c_ptr
105 type(c_ptr), value :: output_d, pij_d, lambda_d
106 integer(c_int) :: m, n
107 end subroutine mattrans_v_mul_cuda
108
109 subroutine mma_gensub1_cuda(low_d, upp_d, x_d, xmin_d, xmax_d, asyinit, n)&
110 bind(c, name = 'mma_gensub1_cuda')
111 import c_rp, c_int, c_ptr
112 type(c_ptr), value :: low_d, upp_d, x_d, xmin_d, xmax_d
113 real(c_rp) :: asyinit
114 integer(c_int) :: n
115 end subroutine mma_gensub1_cuda
116
117 subroutine mma_gensub2_cuda(low_d, upp_d, x_d, xold1_d, xold2_d, xdiff_d, &
118 asydecr, asyincr, n) bind(c, name = 'mma_gensub2_cuda')
119 import c_rp, c_int, c_ptr
120 type(c_ptr), value :: low_d, upp_d, x_d, xold1_d, xold2_d, xdiff_d
121 real(c_rp) :: asydecr, asyincr
122 integer(c_int) :: n
123 end subroutine mma_gensub2_cuda
124
125 subroutine mma_gensub3_cuda(x_d, df0dx_d, dfdx_d, low_d, upp_d, min_d, &
126 max_d, alpha_d, beta_d, p0j_d, q0j_d, pij_d, qij_d, n, m) &
127 bind(c, name = 'mma_gensub3_cuda')
128 import c_int, c_ptr
129 type(c_ptr), value :: x_d, df0dx_d, dfdx_d, low_d, upp_d, min_d, max_d, &
130 alpha_d, beta_d, p0j_d, q0j_d, pij_d, qij_d
131 integer(c_int) :: n, m
132 end subroutine mma_gensub3_cuda
133
134 subroutine mma_gensub4_cuda(x_d, low_d, upp_d, pij_d, qij_d, n, m, bi_d) &
135 bind(c, name = 'mma_gensub4_cuda')
136 import c_int, c_ptr
137 type(c_ptr), value :: x_d, low_d, upp_d, pij_d, qij_d, bi_d
138 integer(c_int) :: n, m
139 end subroutine mma_gensub4_cuda
140
141 subroutine cuda_mma_max(xsi_d, x_d, alpha_d, n) &
142 bind(c, name = 'cuda_mma_max')
143 import c_int, c_ptr
144 type(c_ptr), value :: xsi_d, x_d, alpha_d
145 integer(c_int) :: n
146 end subroutine cuda_mma_max
147
148 subroutine cuda_rex(rex_d, x_d, low_d, upp_d, pij_d, p0j_d, qij_d, q0j_d, &
149 lambda_d, xsi_d, eta_d, n, m) bind(c, name = 'cuda_rex')
150 import c_int, c_ptr
151 type(c_ptr), value :: rex_d, x_d, low_d, upp_d, pij_d, p0j_d, qij_d, &
152 q0j_d, lambda_d, xsi_d, eta_d
153 integer(c_int) :: n, m
154 end subroutine cuda_rex
155
156 subroutine cuda_relambda(relambda_d, x_d, upp_d, low_d, pij_d, qij_d, n, &
157 m) bind(c, name = 'cuda_relambda')
158 import c_int, c_ptr
159 type(c_ptr), value :: relambda_d, x_d, upp_d, low_d, pij_d, qij_d
160 integer(c_int) :: n, m
161 end subroutine cuda_relambda
162
163 subroutine cuda_sub2cons2(rexsi_d, xsi_d, x_d, alpha_d, epsi, n) &
164 bind(c, name = 'cuda_sub2cons2')
165 import c_rp, c_int, c_ptr
166 type(c_ptr), value :: rexsi_d, xsi_d, x_d, alpha_d
167 real(c_rp) :: epsi
168 integer(c_int) :: n
169 end subroutine cuda_sub2cons2
170
171 real(c_rp) function cuda_maxval(rex_d, n) bind(c, name = 'cuda_maxval')
172 import c_rp, c_int, c_ptr
173 type(c_ptr), value :: rex_d
174 integer(c_int) :: n
175 end function cuda_maxval
176
177 real(c_rp) function cuda_norm(rex_d, n) bind(c, name = 'cuda_norm')
178 import c_rp, c_int, c_ptr
179 type(c_ptr), value :: rex_d
180 integer(c_int) :: n
181 end function cuda_norm
182
183 subroutine cuda_delx(delx_d, x_d, low_d, upp_d, pij_d, qij_d, p0j_d, &
184 q0j_d, alpha_d, beta_d, lambda_d, epsi, n, m) &
185 bind(c, name = 'cuda_delx')
186 import c_rp, c_int, c_ptr
187 type(c_ptr), value :: delx_d, x_d, low_d, upp_d, pij_d, qij_d, p0j_d, &
188 q0j_d, alpha_d, beta_d, lambda_d
189 real(c_rp) :: epsi
190 integer(c_int) :: n, m
191 end subroutine cuda_delx
192
193
194
195 subroutine cuda_gg(GG_d, x_d, low_d, upp_d, pij_d, qij_d, n, m) &
196 bind(c, name = 'cuda_GG')
197 import c_int, c_ptr
198 type(c_ptr), value :: GG_d, x_d, low_d, upp_d, pij_d, qij_d
199 integer(c_int) :: n, m
200 end subroutine cuda_gg
201
202 subroutine cuda_diagx(diagx_d, x_d, xsi_d, low_d, upp_d, p0j_d, q0j_d, &
203 pij_d, qij_d, alpha_d, beta_d, eta_d, lambda_d, n, m) &
204 bind(c, name = 'cuda_diagx')
205 import c_int, c_ptr
206 type(c_ptr), value :: diagx_d, x_d, xsi_d, low_d, upp_d, p0j_d, q0j_d, &
207 pij_d, qij_d, alpha_d, beta_d, eta_d, lambda_d
208 integer(c_int) :: n, m
209 end subroutine cuda_diagx
210
211 subroutine cuda_bb(bb_d, GG_d, delx_d, diagx_d, n, m) &
212 bind(c, name = 'cuda_bb')
213 import c_int, c_ptr
214 type(c_ptr), value :: bb_d, GG_d, delx_d, diagx_d
215 integer(c_int) :: n, m
216 end subroutine cuda_bb
217
218 subroutine cuda_aa(AA_d, GG_d, diagx_d, n, m) bind(c, name = 'cuda_AA')
219 import c_int, c_ptr
220 type(c_ptr), value :: AA_d, GG_d, diagx_d
221 integer(c_int) :: n, m
222 end subroutine cuda_aa
223
224 subroutine cuda_dx(dx_d, delx_d, diagx_d, GG_d, dlambda_d, n, m) &
225 bind(c, name = 'cuda_dx')
226 import c_int, c_ptr
227 type(c_ptr), value :: dx_d, delx_d, diagx_d, GG_d, dlambda_d
228 integer(c_int) :: n, m
229 end subroutine cuda_dx
230
231 subroutine cuda_dxsi(dxsi_d, xsi_d, dx_d, x_d, alpha_d, epsi, n) &
232 bind(c, name = 'cuda_dxsi')
233 import c_rp, c_int, c_ptr
234 type(c_ptr), value :: dxsi_d, xsi_d, dx_d, x_d, alpha_d
235 real(c_rp) :: epsi
236 integer(c_int) :: n
237 end subroutine cuda_dxsi
238
239 subroutine cuda_deta(deta_d, eta_d, dx_d, x_d, beta_d, epsi, n) &
240 bind(c, name = 'cuda_deta')
241 import c_rp, c_int, c_ptr
242 type(c_ptr), value :: deta_d, eta_d, dx_d, x_d, beta_d
243 real(c_rp) :: epsi
244 integer(c_int) :: n
245 end subroutine cuda_deta
246
247 real(c_rp) function cuda_maxval2(dxx_d, xx_d, cons, n) &
248 bind(c, name = 'cuda_maxval2')
249 import c_rp, c_int, c_ptr
250 type(c_ptr), value :: dxx_d, xx_d
251 real(c_rp) :: cons
252 integer(c_int) :: n
253 end function cuda_maxval2
254
255 real(c_rp) function cuda_maxval3(dx_d, x_d, alpha_d, cons, n) &
256 bind(c, name = 'cuda_maxval3')
257 import c_rp, c_int, c_ptr
258 type(c_ptr), value :: dx_d, x_d, alpha_d
259 real(c_rp) :: cons
260 integer(c_int) :: n
261 end function cuda_maxval3
262
263 subroutine cuda_kkt_rex(rex_d, df0dx_d, dfdx_d, xsi_d, eta_d, lambda_d, &
264 n, m) bind(c, name = 'cuda_kkt_rex')
265 import c_int, c_ptr
266 type(c_ptr), value :: rex_d, df0dx_d, dfdx_d, xsi_d, eta_d, lambda_d
267 integer(c_int) :: n, m
268 end subroutine cuda_kkt_rex
269
270
271 subroutine cuda_maxcons(a_d, b, c, d_d, n) bind(c, name = 'cuda_maxcons')
272 import c_rp, c_int, c_ptr
273 type(c_ptr), value :: a_d, d_d
274 real(c_rp) :: b, c
275 integer(c_int) :: n
276 end subroutine cuda_maxcons
277
278
279 real(c_rp) function cuda_lcsc2(a_d, b_d, n) bind(c, name = 'cuda_lcsc2')
280 import c_rp, c_int, c_ptr
281 type(c_ptr), value :: a_d, b_d
282 integer(c_int) :: n
283 end function cuda_lcsc2
284
285 subroutine cuda_mpisum(a_d, n) bind(c, name = 'cuda_mpisum')
286 import c_int, c_ptr
287 type(c_ptr), value :: a_d
288 integer(c_int) :: n
289 end subroutine cuda_mpisum
290
291 subroutine cuda_add2inv2(a_d, b_d, c, n) bind(c, name = 'cuda_add2inv2')
292 import c_rp, c_int, c_ptr
293 type(c_ptr), value :: a_d, b_d
294 integer(c_int) :: n
295 real(c_rp) :: c
296 end subroutine cuda_add2inv2
297
298 subroutine cuda_max2(a_d, b, c_d, d, n) bind(c, name = 'cuda_max2')
299 import c_rp, c_int, c_ptr
300 type(c_ptr), value :: a_d, c_d
301 integer(c_int) :: n
302 real(c_rp) :: b, d
303 end subroutine cuda_max2
304
305 subroutine cuda_updatebb(bb_d, dellambda_d, dely_d, d_d, mu_d, y_d, delz, &
306 m) bind(c, name = 'cuda_updatebb')
307 import c_rp, c_int, c_ptr
308 type(c_ptr), value :: bb_d, dellambda_d, dely_d, d_d, mu_d, y_d
309 integer(c_int) :: m
310 real(c_rp) :: delz
311 end subroutine cuda_updatebb
312
313 subroutine cuda_updateaa(AA_d, globaltmp_mm_d, s_d, lambda_d, d_d, mu_d, &
314 y_d, a_d, zeta, z, m) bind(c, name = 'cuda_updateAA')
315 import c_rp, c_int, c_ptr
316 type(c_ptr), value :: AA_d, globaltmp_mm_d, s_d, lambda_d, d_d, mu_d, &
317 y_d, a_d
318 integer(c_int) :: m
319 real(c_rp) :: zeta, z
320 end subroutine cuda_updateaa
321
322 subroutine cuda_dy(dy_d, dely_d, dlambda_d, d_d, mu_d, y_d, n) &
323 bind(c, name = 'cuda_dy')
324 import c_int, c_ptr
325 type(c_ptr), value :: dy_d, dely_d, dlambda_d, d_d, mu_d, y_d
326 integer(c_int) :: n
327 end subroutine cuda_dy
328
329 end interface
330
331end module cuda_mma_math