35#include <hip/hip_runtime.h>
36#include "math_ext_kernel.h"
37#include <device/hip/check.h>
38#include <device/device_config.h>
44#include <math/bcknd/device/device_mpi_op.h>
45#include <math/bcknd/device/device_mpi_reduce.h>
50void hip_copy_mask(
void* a,
void* b,
int* size,
int* mask,
int* mask_size) {
51 const dim3 nthrds(1024, 1, 1);
52 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
54 if(*mask_size == 0)
return;
55 hipLaunchKernelGGL(copy_mask_kernel<real>, nblcks, nthrds, 0,
56 (hipStream_t)glb_cmd_queue,
57 (real*)a, (real*)b, *size, mask, *mask_size);
58 HIP_CHECK(hipGetLastError());
64void hip_cadd_mask(
void* a, real* c,
int* size,
int* mask,
int* mask_size) {
65 const dim3 nthrds(1024, 1, 1);
66 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
68 if(*mask_size == 0)
return;
69 hipLaunchKernelGGL(cadd_mask_kernel<real>, nblcks, nthrds, 0,
70 (hipStream_t)glb_cmd_queue, (real*)a, *c, *size, mask, *mask_size);
71 HIP_CHECK(hipGetLastError());
77void hip_invcol1_mask(
void* a,
int* size,
int* mask,
int* mask_size) {
78 const dim3 nthrds(1024, 1, 1);
79 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
81 if(*mask_size == 0)
return;
82 hipLaunchKernelGGL(invcol1_mask_kernel<real>, nblcks, nthrds, 0,
83 (hipStream_t)glb_cmd_queue, (real*)a, *size, mask, *mask_size);
84 HIP_CHECK(hipGetLastError());
90void hip_col2_mask(
void* a,
void* b,
int* size,
int* mask,
int* mask_size) {
91 const dim3 nthrds(1024, 1, 1);
92 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
94 if(*mask_size == 0)
return;
95 hipLaunchKernelGGL(col2_mask_kernel<real>, nblcks, nthrds, 0,
96 (hipStream_t)glb_cmd_queue,
97 (real*)a, (real*)b, *size, mask, *mask_size);
98 HIP_CHECK(hipGetLastError());
105 void* a,
void* b,
void* c,
int* size,
int* mask,
int* mask_size) {
107 const dim3 nthrds(1024, 1, 1);
108 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
110 if(*mask_size == 0)
return;
111 hipLaunchKernelGGL(col3_mask_kernel<real>, nblcks, nthrds, 0,
112 (hipStream_t)glb_cmd_queue,
113 (real*)a, (real*)b, (real*)c, *size, mask, *mask_size);
114 HIP_CHECK(hipGetLastError());
121 void* a,
void* b,
void* c,
int* size,
int* mask,
int* mask_size) {
123 const dim3 nthrds(1024, 1, 1);
124 const dim3 nblcks(((*mask_size) + 1024 - 1) / 1024, 1, 1);
126 if(*mask_size == 0)
return;
127 hipLaunchKernelGGL(sub3_mask_kernel<real>, nblcks, nthrds, 0,
128 (hipStream_t)glb_cmd_queue,
129 (real*)a, (real*)b, (real*)c, *size, mask, *mask_size);
130 HIP_CHECK(hipGetLastError());