Skip to content

Commit

Permalink
added inmemory-version of connected components
Browse files Browse the repository at this point in the history
  • Loading branch information
Aapo Kyrola committed Nov 10, 2012
1 parent dd38698 commit 016caa9
Showing 1 changed file with 176 additions and 0 deletions.
176 changes: 176 additions & 0 deletions example_apps/inmemconncomps.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@

/**
* @file
* @author Aapo Kyrola <[email protected]>
* @version 1.0
*
* @section LICENSE
*
* Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @section DESCRIPTION
*
* Application for computing the connected components of a graph.
* The algorithm is simple: on first iteration each vertex sends its
* id to neighboring vertices. On subsequent iterations, each vertex chooses
* the smallest id of its neighbors and broadcasts its (new) label to
* its neighbors. The algorithm terminates when no vertex changes label.
*
* @section REMARKS
*
* Version of connected components that keeps the vertex values
* in memory.
* @author Aapo Kyrola
*/


#include <cmath>
#include <string>

#include "graphchi_basic_includes.hpp"
#include "util/labelanalysis.hpp"

using namespace graphchi;



/**
* Type definitions. Remember to create suitable graph shards using the
* Sharder-program.
*/
typedef vid_t VertexDataType; // vid_t is the vertex id type
typedef vid_t EdgeDataType;

/**
* GraphChi programs need to subclass GraphChiProgram<vertex-type, edge-type>
* class. The main logic is usually in the update function.
*/
struct ConnectedComponentsProgram : public GraphChiProgram<VertexDataType, EdgeDataType> {

VertexDataType * vertex_values;

vid_t neighbor_value(graphchi_edge<EdgeDataType> * edge) {
return vertex_values[edge->vertex_id()];
}

void set_data(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, vid_t value) {
vertex_values[vertex.id()] = value;
vertex.set_data(value);
}

/**
* Vertex update function.
* On first iteration ,each vertex chooses a label = the vertex id.
* On subsequent iterations, each vertex chooses the minimum of the neighbor's
* label (and itself).
*/
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
/* This program requires selective scheduling. */
assert(gcontext.scheduler != NULL);

/* On subsequent iterations, find the minimum label of my neighbors */
vid_t curmin = vertex.get_data();
for(int i=0; i < vertex.num_edges(); i++) {
vid_t nblabel = neighbor_value(vertex.edge(i));
curmin = std::min(nblabel, curmin);
}

/* If my label changes, schedule neighbors */
if (vertex.get_data() != curmin) {
vid_t newlabel = curmin;

for(int i=0; i < vertex.num_edges(); i++) {
if (newlabel < neighbor_value(vertex.edge(i))) {
/* Schedule neighbor for update */
gcontext.scheduler->add_task(vertex.edge(i)->vertex_id());
}
}
}
set_data(vertex, curmin);
}

/**
* Called before an iteration starts.
*/
void before_iteration(int iteration, graphchi_context &ctx) {
if (iteration == 0) {
/* initialize each vertex with its own lable */
vertex_values = new VertexDataType[ctx.nvertices];
for(int i=0; i < (int)ctx.nvertices; i++) {
vertex_values[i] = i;
}
}
}

/**
* Called after an iteration has finished.
*/
void after_iteration(int iteration, graphchi_context &ginfo) {
}

/**
* Called before an execution interval is started.
*/
void before_exec_interval(vid_t window_st, vid_t window_en, graphchi_context &ginfo) {
}

/**
* Called after an execution interval has finished.
*/
void after_exec_interval(vid_t window_st, vid_t window_en, graphchi_context &ginfo) {
}

};

int main(int argc, const char ** argv) {
/* GraphChi initialization will read the command line
arguments and the configuration file. */
graphchi_init(argc, argv);

/* Metrics object for keeping track of performance counters
and other information. Currently required. */
metrics m("connected-components-inmem");

/* Basic arguments for application */
std::string filename = get_option_string("file"); // Base filename
int niters = get_option_int("niters", 10); // Number of iterations (max)
bool scheduler = true; // Always run with scheduler

/* Process input file - if not already preprocessed */
int nshards = (int) convert_if_notexists<EdgeDataType>(filename, get_option_string("nshards", "auto"));

if (get_option_int("onlyresult", 0) == 0) {
/* Run */
ConnectedComponentsProgram program;
graphchi_engine<VertexDataType, EdgeDataType> engine(filename, nshards, scheduler, m);
engine.set_modifies_inedges(false); // Improves I/O performance.
engine.set_modifies_outedges(false); // Improves I/O performance.

engine.run(program, niters);
}

/* Run analysis of the connected components (output is written to a file) */
m.start_time("label-analysis");

analyze_labels<vid_t>(filename);

m.stop_time("label-analysis");

/* Report execution metrics */
metrics_report(m);
return 0;
}

0 comments on commit 016caa9

Please sign in to comment.