Skip to content

Commit

Permalink
ROSS-Damaris Integration (#134)
Browse files Browse the repository at this point in the history
This commit adds support for the ROSS-Damaris integration for in situ analysis and vis.
  • Loading branch information
caitlinross authored Jul 25, 2018
1 parent eecc1b6 commit 26100bb
Show file tree
Hide file tree
Showing 19 changed files with 152 additions and 18 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
[submodule "models/ROSS-Models"]
path = models/ROSS-Models
url = https://github.com/carothersc/ROSS-Models
[submodule "damaris"]
path = core/damaris
url = https://github.com/caitlinross/ROSS-damaris
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,4 @@ IF(DOXYGEN_FOUND)
ADD_SUBDIRECTORY(docs)
ENDIF(ROSS_BUILD_DOXYGEN)
ENDIF(DOXYGEN_FOUND)

8 changes: 8 additions & 0 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ IF(USE_RIO)
SET(ross_srcs ${ross_srcs} ${RIO_SOURCE_DIR}/io.h)
ENDIF(USE_RIO)

# Damaris I/O and data management
OPTION(USE_DAMARIS "Build with Damaris library (for in situ vis/analysis)?" OFF)
if (USE_DAMARIS)
ADD_SUBDIRECTORY(damaris)
INCLUDE_DIRECTORIES(${DAMARIS_INCLUDE})
SET(ross_srcs ${ross_srcs} ${ROSS_Damaris_SOURCE_DIR}/core/damaris.h)
ENDIF(USE_DAMARIS)

# Use debugging-friendly memory allocation
OPTION(ROSS_ALLOC_DEBUG "Use naive allocator to be more friendly to memory debugging tools" OFF)

Expand Down
1 change: 1 addition & 0 deletions core/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
#cmakedefine ROSS_runtime_checks
#cmakedefine ROSS_ALLOC_DEBUG
#cmakedefine USE_RIO
#cmakedefine USE_DAMARIS
1 change: 1 addition & 0 deletions core/damaris
Submodule damaris added at f4a173
18 changes: 18 additions & 0 deletions core/gvt/mpi_allreduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,26 @@ tw_gvt_step2(tw_pe *me)
if ((g_st_engine_stats == GVT_STATS || g_st_engine_stats == ALL_STATS) &&
g_tw_gvt_done % g_st_num_gvt == 0 && gvt <= g_tw_ts_end)
{
#ifdef USE_DAMARIS
if (g_st_damaris_enabled)
{
st_damaris_expose_data(me, gvt, GVT_COL);
st_damaris_end_iteration();
}
else
st_collect_engine_data(me, GVT_COL);
#else
st_collect_engine_data(me, GVT_COL);
#endif
}
#ifdef USE_DAMARIS
// need to make sure damaris_end_iteration is called if GVT instrumentation not turned on
//if (!g_st_stats_enabled && g_st_real_time_samp) //need to make sure if one PE enters this, all do; otherwise deadlock
if (g_st_damaris_enabled && (g_st_engine_stats == RT_STATS || g_st_engine_stats == VT_STATS))
{
st_damaris_end_iteration();
}
#endif

if ((g_st_model_stats == GVT_STATS || g_st_model_stats == ALL_STATS) && g_tw_gvt_done % g_st_num_gvt == 0)
st_collect_model_data(me, (tw_stime)tw_clock_read() / g_tw_clock_rate, GVT_STATS);
Expand Down
12 changes: 11 additions & 1 deletion core/instrumentation/ross-lps/analysis-lp.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,18 @@ void analysis_event(analysis_state *s, tw_bf *bf, analysis_msg *m, tw_lp *lp)
}

// sim engine sampling
if (g_tw_synchronization_protocol != SEQUENTIAL && (g_st_engine_stats == VT_STATS || g_st_engine_stats == ALL_STATS))
if (g_tw_synchronization_protocol != SEQUENTIAL &&
(g_st_engine_stats == VT_STATS || g_st_engine_stats == ALL_STATS))
{
#ifdef USE_DAMARIS
if (g_st_damaris_enabled)
st_damaris_expose_data(lp->pe, tw_now(lp), ANALYSIS_LP);
else
st_collect_engine_data(lp->pe, ANALYSIS_LP);
#else
st_collect_engine_data(lp->pe, ANALYSIS_LP);
#endif
}
//collect_sim_engine_data(lp->pe, lp, s, (tw_stime) tw_clock_read() / g_tw_clock_rate);

// create next sampling event
Expand Down
10 changes: 8 additions & 2 deletions core/instrumentation/st-instrumentation.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

char g_st_stats_out[INST_MAX_LENGTH] = {0};
char g_st_stats_path[4096] = {0};
int g_st_granularity = 0;
int g_st_pe_data = 1;
int g_st_kp_data = 0;
int g_st_lp_data = 0;
int g_st_disable_out = 0;

int g_st_model_stats = 0;
Expand All @@ -19,6 +21,8 @@ tw_clock g_st_rt_samp_start_cycles = 0;
tw_stime g_st_vt_interval = 1000000;
tw_stime g_st_sampling_end = 0;



static const tw_optdef inst_options[] = {
TWOPT_GROUP("ROSS Instrumentation"),
TWOPT_UINT("engine-stats", g_st_engine_stats, "Collect sim engine level stats; 0 don't collect, 1 GVT-sampling, 2 RT sampling, 3 VT sampling, 4 All sampling modes"),
Expand All @@ -27,7 +31,9 @@ static const tw_optdef inst_options[] = {
TWOPT_ULONGLONG("rt-interval", g_st_rt_interval, "real time sampling interval in ms"),
TWOPT_STIME("vt-interval", g_st_vt_interval, "Virtual time sampling interval"),
TWOPT_STIME("vt-samp-end", g_st_sampling_end, "End time for virtual time sampling (if different from g_tw_ts_end)"),
TWOPT_UINT("granularity", g_st_granularity, "for sim engine instrumentation; 0 = PE only, 1 = KP only, 2 = LP only, 3 = All levels"),
TWOPT_UINT("pe-data", g_st_pe_data, "Turn on/off collection of sim engine data at PE level"),
TWOPT_UINT("kp-data", g_st_kp_data, "Turn on/off collection of sim engine data at KP level"),
TWOPT_UINT("lp-data", g_st_lp_data, "Turn on/off collection of sim engine data at LP level"),
TWOPT_UINT("event-trace", g_st_ev_trace, "collect detailed data on all events for specified LPs; 0, no trace, 1 full trace, 2 only events causing rollbacks, 3 only committed events"),
TWOPT_CHAR("stats-prefix", g_st_stats_out, "prefix for filename(s) for stats output"),
TWOPT_CHAR("stats-path", g_st_stats_path, "path to directory to save instrumentation output"),
Expand Down
4 changes: 3 additions & 1 deletion core/instrumentation/st-instrumentation.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ struct sample_metadata

extern char g_st_stats_out[INST_MAX_LENGTH];
extern char g_st_stats_path[INST_MAX_LENGTH];
extern int g_st_granularity;
extern int g_st_pe_data;
extern int g_st_kp_data;
extern int g_st_lp_data;
extern int g_st_disable_out;

extern int g_st_model_stats;
Expand Down
7 changes: 3 additions & 4 deletions core/instrumentation/st-sim-engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@ void st_collect_engine_data(tw_pe *pe, int col_type)
sample_md.ts = pe->GVT;
sample_md.real_time = (double)tw_clock_read() / g_tw_clock_rate;

if (g_st_granularity == GRAN_PE || g_st_granularity == GRAN_ALL)
if (g_st_pe_data)
st_collect_engine_data_pes(pe, &sample_md, &s, col_type);
if (g_st_granularity == GRAN_KP || g_st_granularity == GRAN_ALL)
if (g_st_kp_data)
{
for (i = 0; i < g_tw_nkp; i++)
{
kp = tw_getkp(i);
st_collect_engine_data_kps(pe, kp, &sample_md, &s, col_type);
}
}
if (g_st_granularity == GRAN_LP || g_st_granularity == GRAN_ALL)
if (g_st_lp_data)
{
for (i = 0; i < g_tw_nlp; i++)
{
Expand Down Expand Up @@ -144,7 +144,6 @@ void st_collect_engine_data_kps(tw_pe *pe, tw_kp *kp, sample_metadata *sample_md

void st_collect_engine_data_lps(tw_pe *pe, tw_lp *lp, sample_metadata *sample_md, tw_statistics *s, int col_type)
{
tw_kp *kp;
st_lp_stats lp_stats;
int buf_size = sizeof(*sample_md) + sizeof(lp_stats);
char buffer[buf_size];
Expand Down
16 changes: 14 additions & 2 deletions core/network-mpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,20 @@ const tw_optdef *
tw_net_init(int *argc, char ***argv)
{
int my_rank;

int initialized;
MPI_Initialized(&initialized);

if (!initialized) {
if (MPI_Init(argc, argv) != MPI_SUCCESS)
tw_error(TW_LOC, "MPI_Init failed.");
}

if (MPI_Comm_rank(MPI_COMM_ROSS, &my_rank) != MPI_SUCCESS)
tw_error(TW_LOC, "Cannot get MPI_Comm_rank(MPI_COMM_ROSS)");

g_tw_masternode = 0;
g_tw_mynode = my_rank;

return mpi_opts;
}

Expand Down Expand Up @@ -180,10 +180,22 @@ tw_net_abort(void)
void
tw_net_stop(void)
{
#ifdef USE_DAMARIS
if (g_st_damaris_enabled)
st_damaris_ross_finalize();
else
{
if (!custom_communicator) {
if (MPI_Finalize() != MPI_SUCCESS)
tw_error(TW_LOC, "Failed to finalize MPI");
}
}
#else
if (!custom_communicator) {
if (MPI_Finalize() != MPI_SUCCESS)
tw_error(TW_LOC, "Failed to finalize MPI");
}
#endif
}

void
Expand Down
1 change: 1 addition & 0 deletions core/ross-extern.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ extern size_t tw_memory_allocate(tw_memoryq *);
#define TW_LOC __FILE__,__LINE__
extern int tw_output(tw_lp *lp, const char *fmt, ...);
extern void tw_error(const char *file, int line, const char *fmt, ...) NORETURN;
extern void tw_warning(const char *file, int line, const char *fmt, ...);
extern void tw_printf(const char *file, int line, const char *fmt, ...);
extern void tw_calloc_stats(size_t *alloc, size_t *waste);
extern void* tw_calloc(const char *file, int line, const char *for_who, size_t e_sz, size_t n);
Expand Down
4 changes: 4 additions & 0 deletions core/ross.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,10 @@ typedef uint64_t tw_lpid;

#include "instrumentation/st-instrumentation.h"

#ifdef USE_DAMARIS
#include "damaris/core/damaris.h"
#endif

#include "tw-eventq.h"

#ifdef USE_RIO
Expand Down
11 changes: 10 additions & 1 deletion core/tw-sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,11 +258,20 @@ static void tw_sched_batch(tw_pe * me) {
tw_clock_read() - g_st_rt_samp_start_cycles > g_st_rt_interval)
{
tw_clock current_rt = tw_clock_read();
#ifdef USE_DAMARIS
if (g_st_engine_stats == RT_STATS || g_st_engine_stats == ALL_STATS)
{
if (g_st_damaris_enabled)
st_damaris_expose_data(me, me->GVT, RT_COL);
else
st_collect_engine_data(me, RT_COL);
}
#else
if (g_st_engine_stats == RT_STATS || g_st_engine_stats == ALL_STATS)
st_collect_engine_data(me, RT_COL);
if (g_st_model_stats == RT_STATS || g_st_model_stats == ALL_STATS)
st_collect_model_data(me, (tw_stime)current_rt / g_tw_clock_rate, RT_STATS);

#endif
g_st_rt_samp_start_cycles = tw_clock_read();
}

Expand Down
33 changes: 31 additions & 2 deletions core/tw-setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ void tw_init(int *argc, char ***argv) {
#endif

tw_opt_add(tw_net_init(argc, argv));

// Print out the command-line so we know what we passed in
if (tw_ismaster()) {
for (i = 0; i < *argc; i++) {
Expand All @@ -62,6 +62,9 @@ void tw_init(int *argc, char ***argv) {
tw_opt_add(tw_gvt_setup());
tw_opt_add(tw_clock_setup());
tw_opt_add(st_inst_opts());
#ifdef USE_DAMARIS
tw_opt_add(st_damaris_opts());
#endif
tw_opt_add(st_special_lp_opts());
#ifdef USE_RIO
tw_opt_add(io_opts);
Expand All @@ -70,6 +73,14 @@ void tw_init(int *argc, char ***argv) {
// by now all options must be in
tw_opt_parse(argc, argv);

#ifdef USE_DAMARIS
st_damaris_ross_init();
if (!g_st_ross_rank) // Damaris ranks only
return;
else
{
#endif

if(tw_ismaster())
{
struct stat buffer;
Expand Down Expand Up @@ -103,7 +114,9 @@ void tw_init(int *argc, char ***argv) {

tw_net_start();
tw_gvt_start();

#ifdef USE_DAMARIS
} // end of if(g_st_ross_rank)
#endif
}

static void early_sanity_check(void) {
Expand Down Expand Up @@ -279,6 +292,7 @@ void tw_define_lps(tw_lpid nlp, size_t msg_sz) {
tw_rand_init_streams(g_tw_lp[i], g_tw_nRNG_per_lp);
}
}

}

static void late_sanity_check(void) {
Expand Down Expand Up @@ -350,6 +364,10 @@ void tw_run(void) {

// init instrumentation
st_inst_init();
#ifdef USE_DAMARIS
if (g_st_damaris_enabled)
st_damaris_inst_init();
#endif

#ifdef USE_BGPM
Bgpm_Init(BGPM_MODE_SWDISTRIB);
Expand Down Expand Up @@ -419,10 +437,17 @@ void tw_run(void) {
}

void tw_end(void) {
#ifdef USE_DAMARIS
if(g_st_ross_rank)
{
#endif
if(tw_ismaster()) {
fprintf(g_tw_csv, "\n");
fclose(g_tw_csv);
}
#ifdef USE_DAMARIS
} // end if(g_st_ross_rank)
#endif

tw_net_stop();
}
Expand Down Expand Up @@ -518,6 +543,10 @@ static tw_pe * setup_pes(void) {
}
printf("\n");

#ifdef USE_DAMARIS
st_damaris_init_print();
#endif

// moved these so ross.csv stays consistent
fprintf(g_tw_csv, "%d,", num_events_per_pe);
fprintf(g_tw_csv, "%d,", g_tw_gvt_threshold);
Expand Down
14 changes: 14 additions & 0 deletions core/tw-util.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,20 @@ tw_error(const char *file, int line, const char *fmt, ...)
tw_net_abort();
}

void
tw_warning(const char *file, int line, const char *fmt, ...)
{
va_list ap;

va_start(ap, fmt);
fprintf(stdout, "node: %ld: warning: %s:%i: ", g_tw_mynode, file, line);
vfprintf(stdout, fmt, ap);
fprintf(stdout, "\n");
fflush(stdout);
fflush(stdout);
va_end(ap);
}

struct mem_pool
{
struct mem_pool *next_pool;
Expand Down
6 changes: 3 additions & 3 deletions models/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ ENDFUNCTION(ROSS_TEST_SCHEDULERS)
## INSTRUMENTATION TESTS
FUNCTION(ROSS_TEST_INSTRUMENTATION target_name)
ADD_TEST(${target_name}_INST_OptPE mpirun -np 2 ./${target_name} --synch=3 --engine-stats=4 --event-trace=2 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
ADD_TEST(${target_name}_INST_OptLP mpirun -np 2 ./${target_name} --synch=3 --engine-stats=4 --event-trace=2 --granularity=3 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
ADD_TEST(${target_name}_INST_OptLP mpirun -np 2 ./${target_name} --synch=3 --engine-stats=4 --event-trace=2 --kp-data=1 --lp-data=1 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)

ADD_TEST(${target_name}_INST_RTOptPE mpirun -np 2 ./${target_name} --synch=5 --engine-stats=4 --event-trace=2 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
ADD_TEST(${target_name}_INST_RTOptLP mpirun -np 2 ./${target_name} --synch=5 --engine-stats=4 --event-trace=2 --granularity=3 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
ADD_TEST(${target_name}_INST_RTOptLP mpirun -np 2 ./${target_name} --synch=5 --engine-stats=4 --event-trace=2 --kp-data=1 --lp-data=1 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)

ADD_TEST(${target_name}_INST_ConsPE mpirun -np 2 ./${target_name} --synch=2 --engine-stats=4 --event-trace=2 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
ADD_TEST(${target_name}_INST_ConsLP mpirun -np 2 ./${target_name} --synch=2 --engine-stats=4 --event-trace=2 --granularity=3 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
ADD_TEST(${target_name}_INST_ConsLP mpirun -np 2 ./${target_name} --synch=2 --engine-stats=4 --event-trace=2 --kp-data=1 --lp-data=1 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)

ADD_TEST(${target_name}_INST_Seq ./${target_name} --synch=1 --event-trace=1 --extramem=100000)

Expand Down
Loading

0 comments on commit 26100bb

Please sign in to comment.