35#ifndef __NEKO_CUDA_MATH_EXT_KERNELS__
36#define __NEKO_CUDA_MATH_EXT_KERNELS__
50 a[mask[i]-1] =
b[mask[i]-1];
66 a[mask[i]-1] = a[mask[i]-1] + c;
82 a[mask[i]-1] = 1.0 / a[mask[i]-1];
98 a[mask[i]-1] = a[mask[i]-1] *
b[mask[i]-1];
114 a[mask[i]-1] =
b[mask[i]-1] * c[mask[i]-1];
130 a[mask[i]-1] =
b[mask[i]-1] - c[mask[i]-1];
__global__ void convex_down_RAMP_mapping_apply_kernel(const T f_min, const T f_max, const T q, T *__restrict__ X_out_d, T *__restrict__ X_in_d, const int n)
__global__ void col3_mask_kernel(T *__restrict__ a, T *__restrict__ b, T *__restrict__ c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void cadd_mask_kernel(T *__restrict__ a, const T c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void sub3_mask_kernel(T *__restrict__ a, T *__restrict__ b, T *__restrict__ c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void copy_mask_kernel(T *__restrict__ a, T *__restrict__ b, const int size, int *__restrict__ mask, const int mask_size)
__global__ void invcol1_mask_kernel(T *__restrict__ a, const int size, int *__restrict__ mask, const int mask_size)
__global__ void col2_mask_kernel(T *__restrict__ a, T *__restrict__ b, const int size, int *__restrict__ mask, const int mask_size)