42#include <cuda_runtime.h>
45#include <neko/device/cuda/check.h>
46#include <neko/device/device_config.h>
56 void* X_out_d,
void* X_in_d,
int* n) {
58 const dim3 nthrds(1024, 1, 1);
59 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
73 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
88 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
102 const dim3 nblcks(((*n) + 1024 - 1) / 1024, 1, 1);
void cuda_convex_up_RAMP_mapping_apply(real *f_min, real *f_max, real *q, void *X_out_d, void *X_in_d, int *n)
void cuda_convex_down_RAMP_mapping_apply_backward(real *f_min, real *f_max, real *q, void *sens_out_d, void *sens_in_d, void *X_in_d, int *n)
void cuda_convex_down_RAMP_mapping_apply(real *f_min, real *f_max, real *q, void *X_out_d, void *X_in_d, int *n)
void cuda_convex_up_RAMP_mapping_apply_backward(real *f_min, real *f_max, real *q, void *sens_out_d, void *sens_in_d, void *X_in_d, int *n)
__global__ void convex_down_RAMP_mapping_apply_kernel(const T f_min, const T f_max, const T q, T *__restrict__ X_out_d, T *__restrict__ X_in_d, const int n)