Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
mma.cu
Go to the documentation of this file.
1#include "device/cuda/check.h"
2#include "mma_kernel.h"
3#include <stdio.h>
4#include <stdlib.h>
5
6
7extern "C" {
8#include "math/bcknd/device/device_mpi_reduce.h"
9#include "math/bcknd/device/device_mpi_op.h"
10#include "device/device_config.h"
11
12
13 int mma_red_s = 0;
17 void mma_gensub4_cuda(void* x, void* low, void* upp, void* pij, void* qij,
18 int* n, int* m, void* bi) {
19 const dim3 nthrds(1024, 1, 1);
20 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
21 const int nb = ((*n) + 1024 - 1) / 1024;
22 const cudaStream_t stream = (cudaStream_t)glb_cmd_queue;
23
24 if (nb > mma_red_s) {
25 mma_red_s = nb;
26 if (mma_bufred != NULL) {
29 }
31 nb * sizeof(real)));
33 nb * sizeof(real)));
34 }
35
36 real* temp;
37 real* bi_d = (real*)bi;
38 cudaMalloc(&temp, (*m) * (*n) * sizeof(real));
39
41 (real*)x, (real*)low, (real*)upp, (real*)pij, (real*)qij,
42 temp, *n, *m);
43
44 for (int i = 0; i < (*m); i++) {
46 temp, mma_bufred_d, (*n), (*m), i);
48
49 mmareduce_kernel<real><<<1, 1024, 0, stream>>>(
52
54 bi_d + i, mma_bufred_d, sizeof(real),
56
58 }
59
60 cudaFree(temp);
61 }
62
64 void mma_gensub3_cuda(void* x, void* df0dx, void* dfdx, void* low,
65 void* upp, void* xmin, void* xmax, void* alpha, void* beta,
66 void* p0j, void* q0j, void* pij, void* qij, int* n, int* m) {
67 const dim3 nthrds(1024, 1, 1);
68 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
69
71 (cudaStream_t)glb_cmd_queue>>>(
72 (real*)x, (real*)df0dx, (real*)dfdx, (real*)low,
73 (real*)upp, (real*)xmin, (real*)xmax, (real*)alpha,
74 (real*)beta, (real*)p0j, (real*)q0j, (real*)pij,
75 (real*)qij, *n, *m);
76
78 }
79
80 void mma_gensub2_cuda(void* low, void* upp, void* x, void* xold1,
81 void* xold2, void* xmin, void* xmax, real* asydecr,
82 real* asyincr, int* n) {
83 const dim3 nthrds(1024, 1, 1);
84 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
85
87 (cudaStream_t)glb_cmd_queue>>>(
88 (real*)low, (real*)upp, (real*)x, (real*)xold1,
89 (real*)xold2, (real*)xmin, (real*)xmax, *asydecr,
90 *asyincr, *n);
91
93 }
94
95
96
97 void mma_gensub1_cuda(void* low, void* upp, void* x, void* xmin, void* xmax,
98 real* asyinit, int* n) {
99 const dim3 nthrds(1024, 1, 1);
100 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
101 mma_sub1_kernel<real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue>>>
102 ((real*)low, (real*)upp, (real*)x, (real*)xmin, (real*)xmax,
103 *asyinit, *n);
105 }
106
107 void cuda_mma_max(void* xsi, void* x, void* alpha, int* n) {
108 const dim3 nthrds(1024, 1, 1);
109 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
110
111 mma_max2_kernel<real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue>>>
112 ((real*)xsi, (real*)x, (real*)alpha, *n);
114 }
115
116 void cuda_relambda(void* relambda, void* x, void* xupp, void* xlow,
117 void* pij, void* qij, int* n, int* m) {
118 const dim3 nthrds(1024, 1, 1);
119 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
120 const int nb = ((*n) + 1024 - 1)/ 1024;
121 const cudaStream_t stream = (cudaStream_t) glb_cmd_queue;
122
123 if ( nb > mma_red_s){
124 mma_red_s = nb;
125 if (mma_bufred != NULL) {
128 }
131 }
132 real* temp;
133 cudaMalloc(&temp, (*n) * (*m) * sizeof(real));
134 relambda_kernel<real> <<<nblcks, nthrds, 0, stream >>> (temp, (real*)x,
135 (real*)xupp, (real*)xlow, (real*)pij, (real*)qij, *n, *m);
136 for (int i = 0; i < (*m); i++) {
138 (temp, mma_bufred_d, (*n),(*m), i);
140 mmareduce_kernel<real> <<<1, 1024, 0, stream >>>
141 (mma_bufred_d, nb);
146 }
147 cudaFree(temp);
148 }
149
150 void cuda_sub2cons2(void* a, void* b, void* c, void* d, real* e, int* n) {
151
152 const dim3 nthrds(1024, 1, 1);
153 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
154
155 sub2cons2_kernel<real><<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue>>>(
156 (real*)a, (real*)b, (real*)c, (real*)d, *e, *n);
158 }
159
161 real cuda_maxval(void* a, int* n) {
162
163 const dim3 nthrds(1024, 1, 1);
164 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
165 const int nb = ((*n) + 1024 - 1) / 1024;
166 const cudaStream_t stream = (cudaStream_t)glb_cmd_queue;
167
168 if (nb > mma_red_s) {
169 mma_red_s = nb;
170 if (mma_bufred != NULL) {
173 }
176 }
177
179 (real*)a, mma_bufred_d, (*n));
181
182 max_reduce_kernel<real><<<1, 1024, 0, stream>>>(
185
189
190 return mma_bufred[0];
191 }
192
193
194 void cuda_delx(void* delx, void* x, void* xlow, void* xupp, void* pij,
195 void* qij, void* p0j, void* q0j, void* alpha, void* beta, void* lambda,
196 real* epsi, int* n, int* m) {
197
198 const dim3 nthrds(1024, 1, 1);
199 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
200
201 delx_kernel<real><<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue>>>(
202 (real*)delx, (real*)x, (real*)xlow, (real*)xupp, (real*)pij,
203 (real*)qij, (real*)p0j, (real*)q0j, (real*)alpha, (real*)beta,
204 (real*)lambda, *epsi, *n, *m);
206 }
207
208
209 void cuda_GG(void* GG, void* x, void* xlow, void* xupp,
210 void* pij, void* qij, int* n, int* m) {
211 const dim3 nthrds(1024, 1, 1);
212 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
213 GG_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
214 ((real*)GG, (real*)x, (real*)xlow,(real*) xupp,
215 (real*)pij, (real*) qij, *n,*m);
217 }
218
219
220 void cuda_diagx(void* diagx, void* x, void* xsi,void* xlow, void* xupp,
221 void* p0j, void* q0j, void* pij, void* qij, void* alpha, void* beta,
222 void* eta, void* lambda, int *n, int *m) {
223 const dim3 nthrds(1024, 1, 1);
224 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
225 diagx_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
226 ((real*)diagx, (real*)x, (real*)xsi,(real*)xlow,
227 (real*) xupp,(real*)p0j, (real*) q0j, (real*)pij, (real*) qij,
228 (real*)alpha, (real*) beta, (real*)eta, (real*) lambda, *n,*m);
230 }
231
232
233 void cuda_bb(void* bb, void* GG, void* delx,void* diagx, int *n, int *m) {
234 const dim3 nthrds(1024, 1, 1);
235 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
236 const int nb = ((*n) + 1024 - 1)/ 1024;
237 const cudaStream_t stream = (cudaStream_t) glb_cmd_queue;
239 if(nb > mma_red_s){
240 mma_red_s = nb;
241 if(mma_bufred != NULL){
244 }
247 }
248 for (int i = 0; i < (*m); i++) {
250 ((real*)GG,(real*)delx,(real*)diagx, mma_bufred_d, (*n),(*m), i);
252 mmareduce_kernel<real> <<<1, 1024, 0, stream>>> (mma_bufred_d, nb);
257 }
258 }
259
260
261 void cuda_AA(void* AA, void* GG, void* diagx, int *n, int *m) {
262 const dim3 nthrds(1024, 1, 1);
263 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
264 const int nb = ((*n) + 1024 - 1)/ 1024;
265 const cudaStream_t stream = (cudaStream_t) glb_cmd_queue;
267 if(nb > mma_red_s){
268 mma_red_s = nb;
269 if(mma_bufred != NULL){
272 }
275 }
276 for (int i = 0; i < (*m); i++){
277 for (int j=0; j<(*m);j++){
279 ((real*)GG,(real*)diagx, mma_bufred_d, (*n),(*m), i, j);
281 mmareduce_kernel<real> <<<1, 1024, 0, stream>>> (mma_bufred_d, nb);
284 i+j*(*m+1));
287 }
288 }
289 }
290
291
292 void cuda_dx(void* dx,void* delx, void* diagx, void* GG, void* dlambda,
293 int* n, int* m) {
294 const dim3 nthrds(1024, 1, 1);
295 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
296 dx_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
297 ((real*)dx, (real*)delx, (real*)diagx,(real*) GG, (real*)dlambda,
298 *n,*m);
300 }
301
302
303 void cuda_dxsi(void* dxsi, void* xsi, void* dx,void* x,
304 void* alpha, real*epsi, int* n) {
305 const dim3 nthrds(1024, 1, 1);
306 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
307 dxsi_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
308 ((real*)dxsi, (real*)xsi, (real*)dx,(real*) x,
309 (real*)alpha, *epsi,*n);
311 }
312
313
314 void cuda_deta(void* deta, void* eta, void* dx, void* x,
315 void* beta, real* epsi, int* n) {
316 const dim3 nthrds(1024, 1, 1);
317 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
318 deta_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
319 ((real*)deta, (real*)eta, (real*)dx, (real*)x,
320 (real*)beta, *epsi, *n);
322 }
323
324
325 void cuda_rex(void* rex, void* x, void* xlow, void* xupp, void* pij,
326 void* p0j, void* qij, void* q0j, void* lambda, void* xsi, void* eta,
327 int* n, int* m) {
328 const dim3 nthrds(1024, 1, 1);
329 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
331 (cudaStream_t)glb_cmd_queue >>> ((real*)rex, (real*)x, (real*)xlow,
332 (real*)xupp, (real*)pij, (real*)p0j, (real*)qij, (real*)q0j,
333 (real*)lambda, (real*)xsi, (real*)eta, *n, *m);
335 }
336
337
338 void cuda_rey(void* rey, void* c, void* d, void* y, void* lambda, void* mu,
339 int* n) {
340 const dim3 nthrds(1024, 1, 1);
341 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
343 (cudaStream_t)glb_cmd_queue >>> ((real*)rey, (real*)c,
344 (real*)d, (real*)y, (real*)lambda, (real*)mu, * n);
346 }
347
348
350 void cuda_sub2cons(void * a,void * b,void * c, real *d, int * n) {
351 const dim3 nthrds(1024, 1, 1);
352 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
353 sub2cons_kernel<real><<<nblcks, nthrds, 0,(cudaStream_t) glb_cmd_queue>>>
354 ((real *) a, (real *) b, (real *) c, *d, *n);
356 }
357
358
360 real cuda_norm(void* a, int* n) {
361 const dim3 nthrds(1024, 1, 1);
362 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
363 const int nb = ((*n) + 1024 - 1)/ 1024;
364 const cudaStream_t stream = (cudaStream_t) glb_cmd_queue;
365 if(nb > mma_red_s){
366 mma_red_s = nb;
367 if(mma_bufred != NULL){
370 }
373 }
375 ((real*)a, mma_bufred_d, (*n));
377 mmareduce_kernel<real> <<<1, 1024, 0, stream >>> (mma_bufred_d, nb);
382 return mma_bufred[0];
383 }
384
385
386 void cuda_dely(void* dely, void* c, void* d, void* y, void* lambda,
387 real* epsi, int* n) {
388 const dim3 nthrds(1024, 1, 1);
389 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
390 dely_kernel<real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
391 ((real*)dely,(real*)c, (real*)d, (real*)y, (real*)lambda,*epsi, * n);
393 }
394
395
396 real cuda_maxval2(void* a, void* b, real* cons, int* n) {
397 const dim3 nthrds(1024, 1, 1);
398 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
399 const int nb = ((*n) + 1024 - 1)/ 1024;
400 const cudaStream_t stream = (cudaStream_t) glb_cmd_queue;
401 if(nb > mma_red_s){
402 mma_red_s = nb;
403 if(mma_bufred != NULL) {
406 }
409 }
411 ((real*)a, (real*)b, mma_bufred_d, *cons, *n);
413 max_reduce_kernel<real> <<<1, 1024, 0,stream >>> (mma_bufred_d, nb);
418 return mma_bufred[0];
419 }
420
421
422 real cuda_maxval3(void* a, void* b, void* c, real* cons, int* n) {
423 const dim3 nthrds(1024, 1, 1);
424 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
425 const int nb = ((*n) + 1024 - 1)/ 1024;
426 const cudaStream_t stream = (cudaStream_t) glb_cmd_queue;
427 if(nb > mma_red_s){
428 mma_red_s = nb;
429 if(mma_bufred != NULL) {
432 }
435 }
437 ((real*)a, (real*)b, (real*)c, mma_bufred_d, *cons, *n);
438 max_reduce_kernel<real> <<<1, 1024, 0,stream >>> (mma_bufred_d, nb);
443 return mma_bufred[0];
444 }
445
446
447 void cuda_kkt_rex(void* rex, void* df0dx, void* dfdx, void* xsi,
448 void* eta, void* lambda, int* n, int* m) {
449 const dim3 nthrds(1024, 1, 1);
450 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
451 kkt_rex_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
452 ((real*)rex, (real*)df0dx, (real*)dfdx, (real*)xsi,
453 (real*)eta, (real*)lambda, *n, *m);
455 }
456
457
459 void cuda_maxcons(void* a, real* b, real* c, void* d, int* n) {
460 const dim3 nthrds(1024, 1, 1);
461 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
462 maxcons_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
463 ((real*)a, *b, *c, (real*)d, *n);
465 }
466
467
468 real cuda_lcsc2(void *a, void*b, int *n) {
469 const dim3 nthrds(1024, 1, 1);
470 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
471 const int nb = ((*n) + 1024 - 1)/ 1024;
472 const cudaStream_t stream = (cudaStream_t) glb_cmd_queue;
473 if ( nb > mma_red_s){
474 mma_red_s = nb;
475 if (mma_bufred != NULL) {
478 }
481 }
483 ((real*)a, (real*)b, mma_bufred_d, (*n));
485 mmareduce_kernel<real> <<<1, 1024, 0, stream>>> (mma_bufred_d, nb);
490 return mma_bufred[0];
491 }
492
493
494 void cuda_mpisum(void *a, int *n) {
495#ifdef HAVE_DEVICE_MPI
496 real* temp=(real*)a;
499#endif
500 }
501
502
503 void cuda_add2inv2(void* a, void *b, real* c, int* n) {
504 const dim3 nthrds(1024, 1, 1);
505 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
506 add2inv2_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
507 ((real*)a, (real*) b, *c, *n);
509 }
510
511
512 void cuda_max2(void* a, real* b, void* c, real* d, int* n) {
513 const dim3 nthrds(1024, 1, 1);
514 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
515 max2_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
516 ((real*)a, *b, (real*)c,*d, *n);
518 }
519
520
521 void cuda_updatebb(void* bb, void* dellambda, void* dely,void* d,
522 void* mu, void* y, real* delz, int *m) {
523 const dim3 nthrds(1024, 1, 1);
524 const dim3 nblcks(((*m+1) + 1024 - 1) / 1024, 1, 1);
525 updatebb_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
526 ((real*) bb, (real*) dellambda, (real*) dely,(real*) d,
527 (real*) mu, (real*) y, *delz, *m);
529 }
530
531
532 void cuda_updateAA(void* AA, void* globaltmp_mm, void* s, void* lambda,
533 void* d, void*mu,void* y,void* a, real* zeta, real* z, int* m) {
534 const dim3 nthrds(1024, 1, 1);
535 const dim3 nblcks(((*m+1) + 1024 - 1) / 1024, 1, 1);
536 updateAA_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
537 ((real*) AA,(real*) globaltmp_mm, (real*) s, (real*) lambda,(real*) d,
538 (real*)mu,(real*) y, (real*)a, *zeta, *z, *m);
540 }
541
542
543 void cuda_dy(void* dy, void* dely, void* dlambda,void* d, void* mu,
544 void* y, int* n) {
545 const dim3 nthrds(1024, 1, 1);
546 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
547 dy_kernel <real> <<<nblcks, nthrds, 0, (cudaStream_t)glb_cmd_queue >>>
548 ((real*) dy, (real*) dely, (real*) dlambda, (real*) d,
549 (real*) mu,(real*) y, *n);
551 }
552
553}/* extern "C" */
554
__global__ void convex_down_RAMP_mapping_apply_kernel(const T f_min, const T f_max, const T q, T *__restrict__ X_out_d, T *__restrict__ X_in_d, const int n)
void cuda_AA(void *AA, void *GG, void *diagx, int *n, int *m)
Definition mma.cu:261
void cuda_diagx(void *diagx, void *x, void *xsi, void *xlow, void *xupp, void *p0j, void *q0j, void *pij, void *qij, void *alpha, void *beta, void *eta, void *lambda, int *n, int *m)
Definition mma.cu:220
real cuda_lcsc2(void *a, void *b, int *n)
Definition mma.cu:468
void cuda_dx(void *dx, void *delx, void *diagx, void *GG, void *dlambda, int *n, int *m)
Definition mma.cu:292
real * mma_bufred
Definition mma.cu:14
void cuda_rex(void *rex, void *x, void *xlow, void *xupp, void *pij, void *p0j, void *qij, void *q0j, void *lambda, void *xsi, void *eta, int *n, int *m)
Definition mma.cu:325
void cuda_updatebb(void *bb, void *dellambda, void *dely, void *d, void *mu, void *y, real *delz, int *m)
Definition mma.cu:521
void cuda_deta(void *deta, void *eta, void *dx, void *x, void *beta, real *epsi, int *n)
Definition mma.cu:314
void cuda_add2inv2(void *a, void *b, real *c, int *n)
Definition mma.cu:503
void cuda_kkt_rex(void *rex, void *df0dx, void *dfdx, void *xsi, void *eta, void *lambda, int *n, int *m)
Definition mma.cu:447
void cuda_dy(void *dy, void *dely, void *dlambda, void *d, void *mu, void *y, int *n)
Definition mma.cu:543
void cuda_sub2cons2(void *a, void *b, void *c, void *d, real *e, int *n)
Definition mma.cu:150
void cuda_mma_max(void *xsi, void *x, void *alpha, int *n)
Definition mma.cu:107
real * mma_bufred_d
Definition mma.cu:15
void cuda_max2(void *a, real *b, void *c, real *d, int *n)
Definition mma.cu:512
void cuda_GG(void *GG, void *x, void *xlow, void *xupp, void *pij, void *qij, int *n, int *m)
Definition mma.cu:209
void mma_gensub1_cuda(void *low, void *upp, void *x, void *xmin, void *xmax, real *asyinit, int *n)
Definition mma.cu:97
void cuda_updateAA(void *AA, void *globaltmp_mm, void *s, void *lambda, void *d, void *mu, void *y, void *a, real *zeta, real *z, int *m)
Definition mma.cu:532
int mma_red_s
Definition mma.cu:13
real cuda_maxval2(void *a, void *b, real *cons, int *n)
Definition mma.cu:396
real cuda_maxval3(void *a, void *b, void *c, real *cons, int *n)
Definition mma.cu:422
void mma_gensub2_cuda(void *low, void *upp, void *x, void *xold1, void *xold2, void *xmin, void *xmax, real *asydecr, real *asyincr, int *n)
Definition mma.cu:80
void mma_gensub3_cuda(void *x, void *df0dx, void *dfdx, void *low, void *upp, void *xmin, void *xmax, void *alpha, void *beta, void *p0j, void *q0j, void *pij, void *qij, int *n, int *m)
Definition mma.cu:64
void cuda_delx(void *delx, void *x, void *xlow, void *xupp, void *pij, void *qij, void *p0j, void *q0j, void *alpha, void *beta, void *lambda, real *epsi, int *n, int *m)
Definition mma.cu:194
void cuda_bb(void *bb, void *GG, void *delx, void *diagx, int *n, int *m)
Definition mma.cu:233
void cuda_dxsi(void *dxsi, void *xsi, void *dx, void *x, void *alpha, real *epsi, int *n)
Definition mma.cu:303
void cuda_mpisum(void *a, int *n)
Definition mma.cu:494
void cuda_dely(void *dely, void *c, void *d, void *y, void *lambda, real *epsi, int *n)
Definition mma.cu:386
void cuda_relambda(void *relambda, void *x, void *xupp, void *xlow, void *pij, void *qij, int *n, int *m)
Definition mma.cu:116
void cuda_sub2cons(void *a, void *b, void *c, real *d, int *n)
Definition mma.cu:350
void mma_gensub4_cuda(void *x, void *low, void *upp, void *pij, void *qij, int *n, int *m, void *bi)
Definition mma.cu:17
real cuda_maxval(void *a, int *n)
Definition mma.cu:161
void cuda_rey(void *rey, void *c, void *d, void *y, void *lambda, void *mu, int *n)
Definition mma.cu:338
void cuda_maxcons(void *a, real *b, real *c, void *d, int *n)
Definition mma.cu:459
real cuda_norm(void *a, int *n)
Definition mma.cu:360