forked from victoroliv2/halide-casestudies
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblur_halide.cpp
129 lines (96 loc) · 2.63 KB
/
blur_halide.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include "Halide.h"
using namespace Halide;
#include <iostream>
#include <limits>
#include <memory>
#include <cfloat>
#include <vector>
#include <sys/time.h>
#define NTRIES 10
double now() {
struct timeval tv;
gettimeofday(&tv, NULL);
static bool first_call = true;
static time_t first_sec = 0;
if (first_call) {
first_call = false;
first_sec = tv.tv_sec;
}
assert(tv.tv_sec >= first_sec);
return (tv.tv_sec - first_sec) + (tv.tv_usec / 1000000.0);
}
struct Stats
{
float min;
float max;
float elapsed[NTRIES];
Stats(){
min = FLT_MAX;
max = -FLT_MAX;
for (int k=0; k<NTRIES; k++) elapsed[k] = FLT_MAX;
}
};
#define TIME_START(st) \
{ \
double start = now(); \
{
#define TIME_END(st, i) \
} \
double end = now(); \
\
st.elapsed[i] = end - start; \
if (st.elapsed[i] < st.min) st.min = st.elapsed[i]; \
if (st.elapsed[i] > st.max) st.max = st.elapsed[i]; \
}
int main(int argc, char **argv) {
Image<float> input (2050, 2050, 1);
struct Stats blur_time;
Func blur_x("blur_x"), blur_y("blur_y");
Var x("x"), y("y"), yo("yo"), xo("xo"), xi("xi"), yi("yi");
// The algorithm
blur_x(x, y) = (input(x, y+1) + input(x+1, y+1) + input(x+2, y+1))/3;
blur_y(x, y) = (blur_x(x+1, y) + blur_x(x+1, y+1) + blur_x(x+1, y+2))/3;
int sched = atoi(argv[1]);
switch(sched)
{
case 0:
blur_x.root();
blur_y.root();
break;
case 1:
blur_x.root().parallel(y);
blur_y.root().parallel(y);
break;
case 2:
blur_y.split(y, yo, yi, 4);
blur_y.parallel(yo);
blur_y.vectorize(x, 4);
blur_x.chunk(yo);
blur_x.vectorize(x, 4);
break;
case 3:
blur_y.tile(x, y, xi, yi, 128, 32);
blur_y.vectorize(xi, 4);
blur_y.parallel(y);
blur_x.chunk(x);
blur_x.vectorize(x, 4);
break;
case 4:
blur_y.root().parallel(y).vectorize(x, 4);
break;
// case 4:
// blur_y.split(y, y, yi, 8).parallel(y).vectorize(x, 8);
// blur_x.chunk(y).vectorize(x, 8);
// break;
}
for(int k=0; k<NTRIES; k++)
{
Image<float> out (input.width()-2, input.height()-2, 1);
TIME_START(blur_time)
blur_y.realize(out);
TIME_END(blur_time, k)
}
printf("[Halide]\n");
printf("- BOX-BLUR: %lf \n", blur_time.min);
return 0;
}