Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
math_ext_kernel.h
Go to the documentation of this file.
1
38#ifndef __NEKO_CUDA_MATH_EXT_KERNELS__
39#define __NEKO_CUDA_MATH_EXT_KERNELS__
40
44template <typename T>
46 T* __restrict__ a, T* __restrict__ b, const int size,
47 int* __restrict__ mask, const int mask_size) {
48
49 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
50 const int str = blockDim.x * gridDim.x;
51
52 for (int i = idx; i < mask_size; i += str) {
53 a[mask[i]] = b[mask[i]];
54 }
55}
56
60template <typename T>
62 T* __restrict__ a, const T c, const int size, int* __restrict__ mask,
63 const int mask_size) {
64
65 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
66 const int str = blockDim.x * gridDim.x;
67
68 for (int i = idx; i < mask_size; i += str) {
69 a[mask[i]] = a[mask[i]] + c;
70 }
71}
72
76template <typename T>
78 T* __restrict__ a, const int size, int* __restrict__ mask,
79 const int mask_size) {
80
81 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
82 const int str = blockDim.x * gridDim.x;
83
84 for (int i = idx; i < mask_size; i += str) {
85 a[mask[i]] = 1.0 / a[mask[i]];
86 }
87}
88
92template <typename T>
94 T* __restrict__ a, T* __restrict__ b, const int size,
95 int* __restrict__ mask, const int mask_size) {
96
97 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
98 const int str = blockDim.x * gridDim.x;
99
100 for (int i = idx; i < mask_size; i += str) {
101 a[mask[i]] = a[mask[i]] * b[mask[i]];
102 }
103}
104
108template <typename T>
110 T* __restrict__ a, T* __restrict__ b, T* __restrict__ c, const int size,
111 int* __restrict__ mask, const int mask_size) {
112
113 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
114 const int str = blockDim.x * gridDim.x;
115
116 for (int i = idx; i < mask_size; i += str) {
117 a[mask[i]] = b[mask[i]] * c[mask[i]];
118 }
119}
120
124template <typename T>
126 T* __restrict__ a, T* __restrict__ b, T* __restrict__ c, const int size,
127 int* __restrict__ mask, const int mask_size) {
128
129 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
130 const int str = blockDim.x * gridDim.x;
131
132 for (int i = idx; i < mask_size; i += str) {
133 a[mask[i]] = b[mask[i]] - c[mask[i]];
134 }
135}
136
137#endif // __NEKO_CUDA_MATH_EXT_KERNELS__
__global__ void convex_down_RAMP_mapping_apply_kernel(const T f_min, const T f_max, const T q, T *__restrict__ X_out_d, T *__restrict__ X_in_d, const int n)
__global__ void col3_mask_kernel(T *__restrict__ a, T *__restrict__ b, T *__restrict__ c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void cadd_mask_kernel(T *__restrict__ a, const T c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void sub3_mask_kernel(T *__restrict__ a, T *__restrict__ b, T *__restrict__ c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void copy_mask_kernel(T *__restrict__ a, T *__restrict__ b, const int size, int *__restrict__ mask, const int mask_size)
__global__ void invcol1_mask_kernel(T *__restrict__ a, const int size, int *__restrict__ mask, const int mask_size)
__global__ void col2_mask_kernel(T *__restrict__ a, T *__restrict__ b, const int size, int *__restrict__ mask, const int mask_size)