Neko-TOP
A portable framework for high-order spectral element flow toplogy optimization.
Loading...
Searching...
No Matches
math_ext.cu
Go to the documentation of this file.
1
37// System includes
38#include <stdio.h>
39#include <stdlib.h>
40
41// Device includes
42#include <cuda_runtime.h>
43
44// Neko includes
45#include <neko/device/device_config.h>
46#include <neko/device/cuda/check.h>
47#include <neko/math/bcknd/device/device_mpi_op.h>
48#include <neko/math/bcknd/device/device_mpi_reduce.h>
49
50// Local includes
51#include "math_ext_kernel.h"
52
53extern "C" {
54
58void cuda_copy_mask(void* a, void* b, int* size, int* mask, int* mask_size) {
59
60 const dim3 nthrds(1024, 1, 1);
61 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
62
63 if (*mask_size == 0) return;
65 (real*)a, (real*)b, *size, mask, *mask_size);
67}
68
72void cuda_cadd_mask(void* a, real* c, int* size, int* mask, int* mask_size) {
73
74 const dim3 nthrds(1024, 1, 1);
75 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
76
77 if (*mask_size == 0) return;
79 (real*)a, *c, *size, mask, *mask_size);
81}
82
86void cuda_invcol1_mask(void* a, int* size, int* mask, int* mask_size) {
87
88 const dim3 nthrds(1024, 1, 1);
89 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
90
91 if (*mask_size == 0) return;
94 (real*)a, *size, mask, *mask_size);
96}
97
101void cuda_col2_mask(void* a, void* b, int* size, int* mask, int* mask_size) {
102
103 const dim3 nthrds(1024, 1, 1);
104 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
105
106 if (*mask_size == 0) return;
108 (real*)a, (real*)b, *size, mask, *mask_size);
110}
111
116 void* a, void* b, void* c, int* size, int* mask, int* mask_size) {
117
118 const dim3 nthrds(1024, 1, 1);
119 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
120
121 if (*mask_size == 0) return;
123 (real*)a, (real*)b, (real*)c, *size, mask, *mask_size);
125}
126
131 void* a, void* b, void* c, int* size, int* mask, int* mask_size) {
132
133 const dim3 nthrds(1024, 1, 1);
134 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
135
136 if (*mask_size == 0) return;
138 (real*)a, (real*)b, (real*)c, *size, mask, *mask_size);
140}
141}
__global__ void convex_down_RAMP_mapping_apply_kernel(const T f_min, const T f_max, const T q, T *__restrict__ X_out_d, T *__restrict__ X_in_d, const int n)
void cuda_col2_mask(void *a, void *b, int *size, int *mask, int *mask_size)
Definition math_ext.cu:101
void cuda_invcol1_mask(void *a, int *size, int *mask, int *mask_size)
Definition math_ext.cu:86
void cuda_cadd_mask(void *a, real *c, int *size, int *mask, int *mask_size)
Definition math_ext.cu:72
void cuda_col3_mask(void *a, void *b, void *c, int *size, int *mask, int *mask_size)
Definition math_ext.cu:115
void cuda_sub3_mask(void *a, void *b, void *c, int *size, int *mask, int *mask_size)
Definition math_ext.cu:130
void cuda_copy_mask(void *a, void *b, int *size, int *mask, int *mask_size)
Definition math_ext.cu:58