Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
math_ext_kernel.h
Go to the documentation of this file.
1
37#ifndef __NEKO_HIP_MATH_EXT_KERNELS__
38#define __NEKO_HIP_MATH_EXT_KERNELS__
39
43template <typename T>
45 T* __restrict__ a, T* __restrict__ b, const int size,
46 int* __restrict__ mask, const int mask_size) {
47
48 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
49 const int str = blockDim.x * gridDim.x;
50
51 for (int i = idx; i < mask_size; i += str) {
52 a[mask[i]-1] = b[mask[i]-1];
53 }
54}
55
59template <typename T>
61 T* __restrict__ a, const T c, const int size, int* __restrict__ mask,
62 const int mask_size) {
63
64 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
65 const int str = blockDim.x * gridDim.x;
66
67 for (int i = idx; i < mask_size; i += str) {
68 a[mask[i]-1] = a[mask[i]-1] + c;
69 }
70}
71
75template <typename T>
77 T* __restrict__ a, const int size, int* __restrict__ mask,
78 const int mask_size) {
79
80 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
81 const int str = blockDim.x * gridDim.x;
82
83 for (int i = idx; i < mask_size; i += str) {
84 a[mask[i]-1] = 1.0 / a[mask[i]-1];
85 }
86}
87
91template <typename T>
93 T* __restrict__ a, T* __restrict__ b, const int size,
94 int* __restrict__ mask, const int mask_size) {
95
96 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
97 const int str = blockDim.x * gridDim.x;
98
99 for (int i = idx; i < mask_size; i += str) {
100 a[mask[i]-1] = a[mask[i]-1] * b[mask[i]-1];
101 }
102}
103
107template <typename T>
109 T* __restrict__ a, T* __restrict__ b, T* __restrict__ c, const int size,
110 int* __restrict__ mask, const int mask_size) {
111
112 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
113 const int str = blockDim.x * gridDim.x;
114
115 for (int i = idx; i < mask_size; i += str) {
116 a[mask[i]-1] = b[mask[i]-1] * c[mask[i]-1];
117 }
118}
119
123template <typename T>
125 T* __restrict__ a, T* __restrict__ b, T* __restrict__ c, const int size,
126 int* __restrict__ mask, const int mask_size) {
127
128 const int idx = blockIdx.x * blockDim.x + threadIdx.x;
129 const int str = blockDim.x * gridDim.x;
130
131 for (int i = idx; i < mask_size; i += str) {
132 a[mask[i]-1] = b[mask[i]-1] - c[mask[i]-1];
133 }
134}
135
136#endif // __NEKO_CUDA_MATH_EXT_KERNELS__
__global__ void convex_down_RAMP_mapping_apply_kernel(const T f_min, const T f_max, const T q, T *__restrict__ X_out_d, T *__restrict__ X_in_d, const int n)
__global__ void col3_mask_kernel(T *__restrict__ a, T *__restrict__ b, T *__restrict__ c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void cadd_mask_kernel(T *__restrict__ a, const T c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void sub3_mask_kernel(T *__restrict__ a, T *__restrict__ b, T *__restrict__ c, const int size, int *__restrict__ mask, const int mask_size)
__global__ void copy_mask_kernel(T *__restrict__ a, T *__restrict__ b, const int size, int *__restrict__ mask, const int mask_size)
__global__ void invcol1_mask_kernel(T *__restrict__ a, const int size, int *__restrict__ mask, const int mask_size)
__global__ void col2_mask_kernel(T *__restrict__ a, T *__restrict__ b, const int size, int *__restrict__ mask, const int mask_size)