38#ifndef __NEKO_CUDA_MATH_EXT_KERNELS__
39#define __NEKO_CUDA_MATH_EXT_KERNELS__
53 a[mask[i]] =
b[mask[i]];
69 a[mask[i]] = a[mask[i]] + c;
85 a[mask[i]] = 1.0 / a[mask[i]];
101 a[mask[i]] = a[mask[i]] *
b[mask[i]];
117 a[mask[i]] =
b[mask[i]] * c[mask[i]];
133 a[mask[i]] =
b[mask[i]] - c[mask[i]];
__global__ void convex_down_RAMP_mapping_apply_kernel(const T f_min, const T f_max, const T q, T *__restrict__ X_out_d, T *__restrict__ X_in_d, const int n)
__global__ void col3_mask_kernel(T *__restrict__ a, T *__restrict__ b, T *__restrict__ c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void cadd_mask_kernel(T *__restrict__ a, const T c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void sub3_mask_kernel(T *__restrict__ a, T *__restrict__ b, T *__restrict__ c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void copy_mask_kernel(T *__restrict__ a, T *__restrict__ b, const int size, int *__restrict__ mask, const int mask_size)
__global__ void invcol1_mask_kernel(T *__restrict__ a, const int size, int *__restrict__ mask, const int mask_size)
__global__ void col2_mask_kernel(T *__restrict__ a, T *__restrict__ b, const int size, int *__restrict__ mask, const int mask_size)