-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathomp-scan.cpp
More file actions
49 lines (42 loc) · 1.34 KB
/
omp-scan.cpp
File metadata and controls
49 lines (42 loc) · 1.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#include <algorithm>
#include <stdio.h>
#include <math.h>
#include <omp.h>
// Scan A array and write result into prefix_sum array;
// use long data type to avoid overflow
void scan_seq(long* prefix_sum, const long* A, long n) {
if (n == 0) return;
prefix_sum[0] = 0;
for (long i = 1; i < n; i++) {
prefix_sum[i] = prefix_sum[i-1] + A[i-1];
}
}
void scan_omp(long* prefix_sum, const long* A, long n) {
int p = omp_get_num_threads();
int t = omp_get_thread_num();
// Fill out parallel scan: One way to do this is array into p chunks
// Do a scan in parallel on each chunk, then share/compute the offset
// through a shared vector and update each chunk by adding the offset
// in parallel
}
int main() {
long N = 100000000;
long* A = (long*) malloc(N * sizeof(long));
long* B0 = (long*) malloc(N * sizeof(long));
long* B1 = (long*) malloc(N * sizeof(long));
for (long i = 0; i < N; i++) A[i] = rand();
for (long i = 0; i < N; i++) B1[i] = 0;
double tt = omp_get_wtime();
scan_seq(B0, A, N);
printf("sequential-scan = %fs\n", omp_get_wtime() - tt);
tt = omp_get_wtime();
scan_omp(B1, A, N);
printf("parallel-scan = %fs\n", omp_get_wtime() - tt);
long err = 0;
for (long i = 0; i < N; i++) err = std::max(err, std::abs(B0[i] - B1[i]));
printf("error = %ld\n", err);
free(A);
free(B0);
free(B1);
return 0;
}