Skip to content

Commit

Permalink
merged from research branch; created dummy parser for community detec…
Browse files Browse the repository at this point in the history
…tion
  • Loading branch information
Aapo Kyrola committed Oct 8, 2013
2 parents b8547a4 + 4b7dc89 commit 367e5bc
Show file tree
Hide file tree
Showing 21 changed files with 1,088 additions and 128 deletions.
4 changes: 4 additions & 0 deletions example_apps/communitydetection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ vid_t & my_label(bidirectional_label & bidir, vid_t myid, vid_t nbid) {
typedef vid_t VertexDataType; // vid_t is the vertex id type
typedef bidirectional_label EdgeDataType; // Note, 8-byte edge data

void parse(bidirectional_label &x, const char * s) { } // Do nothing



/**
* GraphChi programs need to subclass GraphChiProgram<vertex-type, edge-type>
* class. The main logic is usually in the update function.
Expand Down
30 changes: 22 additions & 8 deletions example_apps/connectedcomponents.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@

using namespace graphchi;


int iterationcount = 0;
bool scheduler = false;

/**
* Type definitions. Remember to create suitable graph shards using the
Expand All @@ -67,19 +68,21 @@ typedef vid_t EdgeDataType;
*/
struct ConnectedComponentsProgram : public GraphChiProgram<VertexDataType, EdgeDataType> {

bool converged;

/**
* Vertex update function.
* On first iteration ,each vertex chooses a label = the vertex id.
* On subsequent iterations, each vertex chooses the minimum of the neighbor's
* label (and itself).
*/
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
/* This program requires selective scheduling. */
assert(gcontext.scheduler != NULL);

if (scheduler) gcontext.scheduler->remove_tasks(vertex.id(), vertex.id());

if (gcontext.iteration == 0) {
vertex.set_data(vertex.id());
gcontext.scheduler->add_task(vertex.id());
if (scheduler) gcontext.scheduler->add_task(vertex.id());
}

/* On subsequent iterations, find the minimum label of my neighbors */
Expand All @@ -106,7 +109,8 @@ struct ConnectedComponentsProgram : public GraphChiProgram<VertexDataType, EdgeD
if (label < vertex.edge(i)->get_data()) {
vertex.edge(i)->set_data(label);
/* Schedule neighbor for update */
gcontext.scheduler->add_task(vertex.edge(i)->vertex_id());
if (scheduler) gcontext.scheduler->add_task(vertex.edge(i)->vertex_id(), true);
converged = false;
}
}
} else if (gcontext.iteration == 0) {
Expand All @@ -119,12 +123,18 @@ struct ConnectedComponentsProgram : public GraphChiProgram<VertexDataType, EdgeD
* Called before an iteration starts.
*/
void before_iteration(int iteration, graphchi_context &info) {
iterationcount++;
converged = iteration > 0;
}

/**
* Called after an iteration has finished.
*/
void after_iteration(int iteration, graphchi_context &ginfo) {
if (converged) {
std::cout << "Converged!" << std::endl;
ginfo.set_last_iteration(iteration);
}
}

/**
Expand All @@ -145,15 +155,15 @@ int main(int argc, const char ** argv) {
/* GraphChi initialization will read the command line
arguments and the configuration file. */
graphchi_init(argc, argv);

/* Metrics object for keeping track of performance counters
and other information. Currently required. */
metrics m("connected-components");

/* Basic arguments for application */
std::string filename = get_option_string("file"); // Base filename
int niters = get_option_int("niters", 10); // Number of iterations (max)
bool scheduler = true; // Always run with scheduler
int niters = get_option_int("niters", 1000); // Number of iterations (max)
scheduler = get_option_int("scheduler", false);

/* Process input file - if not already preprocessed */
int nshards = (int) convert_if_notexists<EdgeDataType>(filename, get_option_string("nshards", "auto"));
Expand All @@ -174,6 +184,10 @@ int main(int argc, const char ** argv) {

/* Report execution metrics */
metrics_report(m);
std::cout << "Gauss-Seidel iterations: " << iterationcount << std::endl;
FILE * logf = fopen("cc_log.txt", "a");
fprintf(logf, "async,%s,%d,%d\n", filename.c_str(), iterationcount, get_option_int("randomization", 0));
fclose(logf);
return 0;
}

176 changes: 176 additions & 0 deletions example_apps/contractionresearch.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@

/**
* @file
* @author Aapo Kyrola <[email protected]>
* @version 1.0
*
* @section LICENSE
*
* Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @section DESCRIPTION
*
* Template for GraphChi applications. To create a new application, duplicate
* this template.
*/



#include <string>

#include "graphchi_basic_includes.hpp"

using namespace graphchi;

typedef bool VertexDataType;
typedef bool EdgeDataType;

FILE * logff;

struct Contractor : public GraphChiProgram<VertexDataType, EdgeDataType> {

std::vector<vid_t> vertex_labels;
std::vector<vid_t> vertex_labels_last;
bool synchronous;

Contractor(bool synchronous) : synchronous(synchronous) {}

/**
* Vertex update function.
*/
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

vid_t min_label = vertex_labels[vertex.id()];

std::vector<vid_t> & nbr_labels = (synchronous ? vertex_labels_last : vertex_labels);

/* Loop over all edges (ignore direction) */
for(int i=0; i < vertex.num_edges(); i++) {
min_label = std::min(min_label, nbr_labels[vertex.edge(i)->vertex_id()]);
}
vertex_labels[vertex.id()] = min_label;
}

void print() {
for(size_t i=0; i<vertex_labels.size(); i++) {
std::cout << i << " = " << vertex_labels[i] << std::endl;
}
}

/**
* Called before an iteration starts.
*/
void before_iteration(int iteration, graphchi_context &gcontext) {
if (iteration == 0) {
// Initialize labels
vertex_labels.resize(gcontext.nvertices);
for(vid_t v=0; v<gcontext.nvertices; v++) {
vertex_labels[v] = v;
}
// Shuffle
std::cout << "Shuffling labels" << std::endl;
std::random_shuffle(vertex_labels.begin(), vertex_labels.end());
if (synchronous) vertex_labels_last = vertex_labels;


if (get_option_int("print", 0) == 1) {
std::cout << "Initial: " << std::endl;
print();
}


}

if (synchronous) {
vertex_labels_last = vertex_labels;
}
}

/**
* Called after an iteration has finished.
*/
void after_iteration(int iteration, graphchi_context &gcontext) {

}

size_t unique_labels() {
std::vector<vid_t> labs = vertex_labels;
sort(labs.begin(), labs.end(), std::less<vid_t>());
size_t n = 1;
for(size_t i=1; i<labs.size(); i++) {
if (labs[i-1] != labs[i]) n++;
assert(labs[i-1] <= labs[i]);
}


return n;
}



};

int main(int argc, const char ** argv) {
/* GraphChi initialization will read the command line
arguments and the configuration file. */
graphchi_init(argc, argv);
global_logger().set_log_level(LOG_ERROR);

// Seed
timeval t;
gettimeofday(&t, NULL);
srand(t.tv_usec);

/* Metrics object for keeping track of performance counters
and other information. Currently required. */
metrics m("contraction-research");

/* Basic arguments for application */
std::string filename = get_option_string("file"); // Base filename
int niters = get_option_int("niters", 1); // Number of iterations
bool scheduler = get_option_int("scheduler", 0); // Whether to use selective scheduling

/* Detect the number of shards or preprocess an input to create them */
int nshards = convert_if_notexists<EdgeDataType>(filename,
get_option_string("nshards", "auto"));
logff = fopen("contraction_log.txt", "a");

Contractor program(get_option_int("sync"));


/* Run */
graphchi_engine<VertexDataType, EdgeDataType> engine(filename, nshards, scheduler, m);
engine.set_modifies_inedges(false);
engine.set_modifies_outedges(false);
engine.set_disable_vertexdata_storage();
engine.run(program, niters);

if (get_option_int("print", 0) == 1) {
std::cout << "After: " << std::endl;

program.print();
}

fprintf(logff, "%s,random-initlabels,%s,%s,%u,%u,%lu,%lf\n", filename.c_str(), program.synchronous ? "synchronous" : "gauss-seidel",
get_option_int("randomization", 0) ? "random-schedule" : "nonrandom-schedule", niters, engine.num_vertices(), program.unique_labels(),
double(engine.num_vertices() - program.unique_labels()) / engine.num_vertices());
fclose(logff);
printf("%s,%s,%s,%u,%u,%lu,%lf\n", filename.c_str(), program.synchronous ? "synchronous" : "gauss-seidel",
get_option_int("randomization", 0) ? "random-schedule" : "nonrandom-schedule", niters, engine.num_vertices(), program.unique_labels(),
double(engine.num_vertices() - program.unique_labels()) / engine.num_vertices());

return 0;
}
Loading

0 comments on commit 367e5bc

Please sign in to comment.