1#include "device/cuda/check.h"
8#include "math/bcknd/device/device_mpi_reduce.h"
9#include "math/bcknd/device/device_mpi_op.h"
10#include "device/device_config.h"
18 int* n,
int* m,
void* bi) {
20 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
21 const int nb = ((*n) + 1024 - 1) / 1024;
44 for (
int i = 0; i < (*m); i++) {
65 void* upp,
void* xmin,
void* xmax,
void* alpha,
void* beta,
66 void* p0j,
void* q0j,
void* pij,
void* qij,
int* n,
int* m) {
68 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
81 void* xold2,
void* xmin,
void* xmax,
real* asydecr,
82 real* asyincr,
int* n) {
84 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
98 real* asyinit,
int* n) {
100 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
109 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
117 void* pij,
void* qij,
int* n,
int* m) {
119 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
120 const int nb = ((*n) + 1024 - 1)/ 1024;
136 for (
int i = 0; i < (*m); i++) {
153 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
164 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
165 const int nb = ((*n) + 1024 - 1) / 1024;
195 void* qij,
void* p0j,
void* q0j,
void* alpha,
void* beta,
void* lambda,
199 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
210 void* pij,
void* qij,
int* n,
int* m) {
212 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
221 void* p0j,
void* q0j,
void* pij,
void* qij,
void* alpha,
void* beta,
222 void* eta,
void* lambda,
int *n,
int *m) {
224 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
235 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
236 const int nb = ((*n) + 1024 - 1)/ 1024;
248 for (
int i = 0; i < (*m); i++) {
263 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
264 const int nb = ((*n) + 1024 - 1)/ 1024;
276 for (
int i = 0; i < (*m); i++){
277 for (
int j=0;
j<(*m);
j++){
295 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
306 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
317 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
326 void* p0j,
void* qij,
void* q0j,
void* lambda,
void* xsi,
void* eta,
329 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
338 void cuda_rey(
void*
rey,
void* c,
void* d,
void* y,
void* lambda,
void* mu,
341 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
352 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
362 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
363 const int nb = ((*n) + 1024 - 1)/ 1024;
389 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
398 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
399 const int nb = ((*n) + 1024 - 1)/ 1024;
424 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
425 const int nb = ((*n) + 1024 - 1)/ 1024;
448 void* eta,
void* lambda,
int* n,
int* m) {
450 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
461 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
470 const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
471 const int nb = ((*n) + 1024 - 1)/ 1024;
495#ifdef HAVE_DEVICE_MPI
505 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
514 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
522 void* mu,
void* y,
real*
delz,
int *m) {
524 const dim3 nblcks(((*m+1) + 1024 - 1) / 1024, 1, 1);
533 void* d,
void*mu,
void* y,
void* a,
real* zeta,
real* z,
int* m) {
535 const dim3 nblcks(((*m+1) + 1024 - 1) / 1024, 1, 1);
546 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
__global__ void convex_down_RAMP_mapping_apply_kernel(const T f_min, const T f_max, const T q, T *__restrict__ X_out_d, T *__restrict__ X_in_d, const int n)
void cuda_AA(void *AA, void *GG, void *diagx, int *n, int *m)
void cuda_diagx(void *diagx, void *x, void *xsi, void *xlow, void *xupp, void *p0j, void *q0j, void *pij, void *qij, void *alpha, void *beta, void *eta, void *lambda, int *n, int *m)
real cuda_lcsc2(void *a, void *b, int *n)
void cuda_dx(void *dx, void *delx, void *diagx, void *GG, void *dlambda, int *n, int *m)
void cuda_rex(void *rex, void *x, void *xlow, void *xupp, void *pij, void *p0j, void *qij, void *q0j, void *lambda, void *xsi, void *eta, int *n, int *m)
void cuda_updatebb(void *bb, void *dellambda, void *dely, void *d, void *mu, void *y, real *delz, int *m)
void cuda_deta(void *deta, void *eta, void *dx, void *x, void *beta, real *epsi, int *n)
void cuda_add2inv2(void *a, void *b, real *c, int *n)
void cuda_kkt_rex(void *rex, void *df0dx, void *dfdx, void *xsi, void *eta, void *lambda, int *n, int *m)
void cuda_dy(void *dy, void *dely, void *dlambda, void *d, void *mu, void *y, int *n)
void cuda_sub2cons2(void *a, void *b, void *c, void *d, real *e, int *n)
void cuda_mma_max(void *xsi, void *x, void *alpha, int *n)
void cuda_max2(void *a, real *b, void *c, real *d, int *n)
void cuda_GG(void *GG, void *x, void *xlow, void *xupp, void *pij, void *qij, int *n, int *m)
void mma_gensub1_cuda(void *low, void *upp, void *x, void *xmin, void *xmax, real *asyinit, int *n)
void cuda_updateAA(void *AA, void *globaltmp_mm, void *s, void *lambda, void *d, void *mu, void *y, void *a, real *zeta, real *z, int *m)
real cuda_maxval2(void *a, void *b, real *cons, int *n)
real cuda_maxval3(void *a, void *b, void *c, real *cons, int *n)
void mma_gensub2_cuda(void *low, void *upp, void *x, void *xold1, void *xold2, void *xmin, void *xmax, real *asydecr, real *asyincr, int *n)
void mma_gensub3_cuda(void *x, void *df0dx, void *dfdx, void *low, void *upp, void *xmin, void *xmax, void *alpha, void *beta, void *p0j, void *q0j, void *pij, void *qij, int *n, int *m)
void cuda_delx(void *delx, void *x, void *xlow, void *xupp, void *pij, void *qij, void *p0j, void *q0j, void *alpha, void *beta, void *lambda, real *epsi, int *n, int *m)
void cuda_bb(void *bb, void *GG, void *delx, void *diagx, int *n, int *m)
void cuda_dxsi(void *dxsi, void *xsi, void *dx, void *x, void *alpha, real *epsi, int *n)
void cuda_mpisum(void *a, int *n)
void cuda_dely(void *dely, void *c, void *d, void *y, void *lambda, real *epsi, int *n)
void cuda_relambda(void *relambda, void *x, void *xupp, void *xlow, void *pij, void *qij, int *n, int *m)
void cuda_sub2cons(void *a, void *b, void *c, real *d, int *n)
void mma_gensub4_cuda(void *x, void *low, void *upp, void *pij, void *qij, int *n, int *m, void *bi)
real cuda_maxval(void *a, int *n)
void cuda_rey(void *rey, void *c, void *d, void *y, void *lambda, void *mu, int *n)
void cuda_maxcons(void *a, real *b, real *c, void *d, int *n)
real cuda_norm(void *a, int *n)