-
Notifications
You must be signed in to change notification settings - Fork 0
/
ex06.cu
89 lines (71 loc) · 2.08 KB
/
ex06.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
// CUDA programming
// Exercise n. 06
#include <errno.h>
#include <cuda.h>
#include <stdio.h>
#define BLOCKS 4
#define THREADS 4
// Prototype
__global__ void saxpy(float a, float *x, float *y, float *z, int N);
__host__ void initialize_array(float *m, int N);
__host__ void print_saxpy(float a, float *x, float *y, float *z, int N);
int main(void)
{
float *x, *y, *z, a; // host copies of x, y, a
float *d_x, *d_y, *d_z; // device copies of x, y
int N = BLOCKS * THREADS;
int size = N * sizeof(float);
// Allocate space for host copies of x, y
x = (float *)malloc(size);
y = (float *)malloc(size);
z = (float *)malloc(size);
// Setup input values
initialize_array(x, N);
initialize_array(y, N);
a = 3.0/2.5;
// Allocate space for device copies of x, y
cudaMalloc((void **)&d_x, size);
cudaMalloc((void **)&d_y, size);
cudaMalloc((void **)&d_z, size);
// Copy inputs to device
cudaMemcpy(d_x, x, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_y, y, size, cudaMemcpyHostToDevice);
// Call the kernel on GPU
saxpy<<< BLOCKS, THREADS >>>(a, d_x, d_y, d_z, N);
// Copy result back to host
cudaMemcpy(z, d_z, size, cudaMemcpyDeviceToHost);
print_saxpy(a, x, y, z, N);
// Cleanup
free(x);
free(y);
free(z);
cudaFree(d_x);
cudaFree(d_y);
cudaFree(d_z);
return(EXIT_SUCCESS);
}
// Single-precision A*X Plus Y (on device)
__global__ void saxpy(float a, float *x, float *y, float *z, int N)
{
int index = blockIdx.x * blockDim.x + threadIdx.x;
// Avoid accessing beyond the end of the arrays
if(index < N)
{
z[index] = a * x[index] + y[index];
}
}
// Host function to initialize an array
__host__ void initialize_array(float *m, int N)
{
for(int i = 0; i < N; i++)
m[i] = i/(i + 1.0);
}
// Host function to print the elements of the equation
__host__ void print_saxpy(float a, float *x, float *y, float *z, int N)
{
for(int i = 0; i < N; i++)
{
printf("%5.2f = %5.2f x %5.2f + %5.2f\n", z[i], a, x[i], y[i]);
}
printf("\n");
}