36#include <device/cuda/check.h>
37#include <device/device_config.h>
43#include <math/bcknd/device/device_mpi_op.h>
44#include <math/bcknd/device/device_mpi_reduce.h>
49 void* X_out_d,
void* X_in_d,
int* n) {
51 const dim3 nthrds(1024, 1, 1);
52 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
65 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
79 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
92 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
void cuda_convex_up_RAMP_mapping_apply(real *f_min, real *f_max, real *q, void *X_out_d, void *X_in_d, int *n)
void cuda_convex_up_RAMP_mapping_apply_backward(real *f_min, real *f_max, real *q, void *dF_dX_in_d, void *dF_dX_out_d, void *X_in_d, int *n)
void cuda_convex_down_RAMP_mapping_apply(real *f_min, real *f_max, real *q, void *X_out_d, void *X_in_d, int *n)
void cuda_convex_down_RAMP_mapping_apply_backward(real *f_min, real *f_max, real *q, void *dF_dX_in_d, void *dF_dX_out_d, void *X_in_d, int *n)
__global__ void convex_down_RAMP_mapping_apply_kernel(const T f_min, const T f_max, const T q, T *__restrict__ X_out_d, T *__restrict__ X_in_d, const int n)