Skip to content

Commit

Permalink
support for partial variable recovery on restart
Browse files Browse the repository at this point in the history
  • Loading branch information
bnicolae committed Mar 2, 2019
1 parent d7fe9be commit 949c05b
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 13 deletions.
2 changes: 2 additions & 0 deletions include/veloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#define VELOC_MAX_NAME (1024)

#define VELOC_RECOVER_ALL (0)
#define VELOC_RECOVER_SOME (1)
#define VELOC_RECOVER_REST (2)

#ifdef __cplusplus
extern "C" {
Expand Down
30 changes: 17 additions & 13 deletions src/lib/client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,9 @@ bool veloc_client_t::restart_begin(const char *name, int version) {
}

bool veloc_client_t::recover_mem(int mode, std::set<int> &ids) {
if (mode != VELOC_RECOVER_ALL) {
ERROR("only VELOC_RECOVER_ALL mode currently supported");
return false;
}

std::ifstream f;
std::map<int, size_t> region_info;

f.exceptions(std::ifstream::failbit | std::ifstream::badbit);
try {
f.open(current_ckpt.filename(cfg.get("scratch")), std::ifstream::in | std::ifstream::binary);
Expand All @@ -192,19 +189,26 @@ bool veloc_client_t::recover_mem(int mode, std::set<int> &ids) {
for (unsigned int i = 0; i < no_regions; i++) {
f.read((char *)&id, sizeof(int));
f.read((char *)&region_size, sizeof(size_t));
if (mem_regions.find(id) == mem_regions.end()) {
ERROR("protected memory region " << id << " does not exist");
region_info.insert(std::make_pair(id, region_size));
}
for (auto &e : region_info) {
bool found = ids.find(e.first) != ids.end();
if ((mode == VELOC_RECOVER_SOME && !found) || (mode == VELOC_RECOVER_REST && found)) {
f.seekg(e.second, std::ifstream::cur);
continue;
}
if (mem_regions.find(e.first) == mem_regions.end()) {
ERROR("no protected memory region defined for id " << e.first);
return false;
}
if (mem_regions[id].second < region_size) {
ERROR("protected memory region " << id << " is too small ("
<< mem_regions[id].second << ") to hold required size ("
<< region_size << ")");
if (mem_regions[e.first].second < e.second) {
ERROR("protected memory region " << e.first << " is too small ("
<< mem_regions[e.first].second << ") to hold required size ("
<< e.second << ")");
return false;
}
f.read((char *)mem_regions[e.first].first, e.second);
}
for (auto &e : mem_regions)
f.read((char *)e.second.first, e.second.second);
} catch (std::ifstream::failure &e) {
ERROR("cannot read checkpoint file " << current_ckpt << ", reason: " << e.what());
return false;
Expand Down
7 changes: 7 additions & 0 deletions src/lib/veloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ extern "C" int VELOC_Recover_mem() {
return CLIENT_CALL(veloc_client->recover_mem(VELOC_RECOVER_ALL, ids));
}

extern "C" int VELOC_Recover_selective(int mode, int *ids, int no_ids) {
std::set<int> id_set = {};
for (int i = 0; i < no_ids; i++)
id_set.insert(ids[i]);
return CLIENT_CALL(veloc_client->recover_mem(mode, id_set));
}

extern "C" int VELOC_Restart_end(int success) {
return CLIENT_CALL(veloc_client->restart_end(success));
}
Expand Down

0 comments on commit 949c05b

Please sign in to comment.