forked from GraphChi/graphchi-cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpagerank.cpp
189 lines (157 loc) · 6.48 KB
/
pagerank.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
/**
* @file
* @author Aapo Kyrola <[email protected]>
* @version 1.0
*
* @section LICENSE
*
* Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @section DESCRIPTION
*
* Simple pagerank implementation. Uses the basic vertex-based API for
* demonstration purposes. A faster implementation uses the functional API,
* "pagerank_functional".
*/
#include <string>
#include <fstream>
#include <cmath>
#define GRAPHCHI_DISABLE_COMPRESSION
#include "graphchi_basic_includes.hpp"
#include "util/toplist.hpp"
using namespace graphchi;
#define THRESHOLD 1e-1
#define RANDOMRESETPROB 0.15
typedef float VertexDataType;
typedef float EdgeDataType;
struct PagerankProgram : public GraphChiProgram<VertexDataType, EdgeDataType> {
/**
* Called before an iteration starts. Not implemented.
*/
void before_iteration(int iteration, graphchi_context &info) {
}
/**
* Called after an iteration has finished. Not implemented.
*/
void after_iteration(int iteration, graphchi_context &ginfo) {
}
/**
* Called before an execution interval is started. Not implemented.
*/
void before_exec_interval(vid_t window_st, vid_t window_en, graphchi_context &ginfo) {
}
/**
* Pagerank update function.
*/
void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &ginfo) {
float sum=0;
if (ginfo.iteration == 0) {
/* On first iteration, initialize vertex and out-edges.
The initialization is important,
because on every run, GraphChi will modify the data in the edges on disk.
*/
for(int i=0; i < v.num_outedges(); i++) {
graphchi_edge<float> * edge = v.outedge(i);
edge->set_data(1.0 / v.num_outedges());
}
v.set_data(RANDOMRESETPROB);
} else {
/* Compute the sum of neighbors' weighted pageranks by
reading from the in-edges. */
for(int i=0; i < v.num_inedges(); i++) {
float val = v.inedge(i)->get_data();
sum += val;
}
/* Compute my pagerank */
float pagerank = RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum;
/* Write my pagerank divided by the number of out-edges to
each of my out-edges. */
if (v.num_outedges() > 0) {
float pagerankcont = pagerank / v.num_outedges();
for(int i=0; i < v.num_outedges(); i++) {
graphchi_edge<float> * edge = v.outedge(i);
edge->set_data(pagerankcont);
}
}
/* Keep track of the progression of the computation.
GraphChi engine writes a file filename.deltalog. */
ginfo.log_change(std::abs(pagerank - v.get_data()));
/* Set my new pagerank as the vertex value */
v.set_data(pagerank);
}
}
};
/**
* Faster version of pagerank which holds vertices in memory. Used only if the number
* of vertices is small enough.
*/
struct PagerankProgramInmem : public GraphChiProgram<VertexDataType, EdgeDataType> {
std::vector<EdgeDataType> pr;
PagerankProgramInmem(int nvertices) : pr(nvertices, RANDOMRESETPROB) {}
void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &ginfo) {
if (ginfo.iteration > 0) {
float sum=0;
for(int i=0; i < v.num_inedges(); i++) {
sum += pr[v.inedge(i)->vertexid];
}
if (v.outc > 0) {
pr[v.id()] = (RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum) / v.outc;
} else {
pr[v.id()] = (RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum);
}
} else if (ginfo.iteration == 0) {
if (v.outc > 0) pr[v.id()] = 1.0f / v.outc;
}
if (ginfo.iteration == ginfo.num_iterations - 1) {
/* On last iteration, multiply pr by degree and store the result */
v.set_data(v.outc > 0 ? pr[v.id()] * v.outc : pr[v.id()]);
}
}
};
int main(int argc, const char ** argv) {
graphchi_init(argc, argv);
metrics m("pagerank");
global_logger().set_log_level(LOG_DEBUG);
/* Parameters */
std::string filename = get_option_string("file"); // Base filename
int niters = get_option_int("niters", 4);
bool scheduler = false; // Non-dynamic version of pagerank.
int ntop = get_option_int("top", 20);
/* Process input file - if not already preprocessed */
int nshards = convert_if_notexists<EdgeDataType>(filename, get_option_string("nshards", "auto"));
/* Run */
graphchi_engine<float, float> engine(filename, nshards, scheduler, m);
engine.set_modifies_inedges(false); // Improves I/O performance.
bool inmemmode = engine.num_vertices() * sizeof(EdgeDataType) < (size_t)engine.get_membudget_mb() * 1024L * 1024L;
if (inmemmode) {
logstream(LOG_INFO) << "Running Pagerank by holding vertices in-memory mode!" << std::endl;
engine.set_modifies_outedges(false);
engine.set_disable_outedges(true);
engine.set_only_adjacency(true);
PagerankProgramInmem program(engine.num_vertices());
engine.run(program, niters);
} else {
PagerankProgram program;
engine.run(program, niters);
}
/* Output top ranked vertices */
std::vector< vertex_value<float> > top = get_top_vertices<float>(filename, ntop);
std::cout << "Print top " << ntop << " vertices:" << std::endl;
for(int i=0; i < (int)top.size(); i++) {
std::cout << (i+1) << ". " << top[i].vertex << "\t" << top[i].value << std::endl;
}
metrics_report(m);
return 0;
}