Skip to content

Commit

Permalink
[native] Add LinuxMemoryChecker check/warning to ensure system-mem-li…
Browse files Browse the repository at this point in the history
…mit-gb is reasonably set

Add additional checks and warnings to ensure
system-memory-gb < system-mem-limit-gb < memory limit for process.

For cgroup v1:
Set memory limit for process to be the smaller number
between /proc/meminfo and memory.limit_in_bytes

For cgroup v2:
Set memory limit for process to be the smaller number
between /proc/meminfo and memory.max
If memory.max contains "max" string, then look at
/proc/meminfo for the MemTotal, otherwise use the
value in memory.max.
  • Loading branch information
minhancao committed Feb 8, 2025
1 parent d827df7 commit b52384a
Show file tree
Hide file tree
Showing 5 changed files with 389 additions and 119 deletions.
138 changes: 123 additions & 15 deletions presto-native-execution/presto_cpp/main/LinuxMemoryChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <sys/stat.h>
#include "presto_cpp/main/PeriodicMemoryChecker.h"
#include "presto_cpp/main/common/Configs.h"
#include "presto_cpp/main/common/Utils.h"

namespace facebook::presto {

Expand All @@ -29,13 +30,32 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker {
// it's mounted.
struct stat buffer;
if ((stat(kCgroupV1Path, &buffer) == 0)) {
statFile_ = kCgroupV1Path;
} else if ((stat(kCgroupV2Path, &buffer) == 0)) {
statFile_ = kCgroupV2Path;
} else {
statFile_ = "None";
PRESTO_STARTUP_LOG(INFO) << "Using cgroup v1.";
if (stat(kCgroupV1MemStatFile, &buffer) == 0) {
memStatFile_ = kCgroupV1MemStatFile;
}
if ((stat(kCgroupV1MaxMemFile, &buffer) == 0)) {
memMaxFile_ = kCgroupV1MaxMemFile;
}
}
LOG(INFO) << fmt::format("Using memory stat file {}", statFile_);

// In cgroup v2.
else {
PRESTO_STARTUP_LOG(INFO) << "Using cgroup v2.";
if (stat(kCgroupV2MemStatFile, &buffer) == 0) {
memStatFile_ = kCgroupV2MemStatFile;
}
if ((stat(kCgroupV2MaxMemFile, &buffer) == 0)) {
memMaxFile_ = kCgroupV2MaxMemFile;
}
}

PRESTO_STARTUP_LOG(INFO) << fmt::format(
"Using memory stat file: {}",
memStatFile_.empty() ? memInfoFile_ : memStatFile_);
PRESTO_STARTUP_LOG(INFO) << fmt::format(
"Using memory max file {}",
memMaxFile_.empty() ? memInfoFile_ : memMaxFile_);
}

~LinuxMemoryChecker() override {}
Expand All @@ -45,8 +65,90 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker {
}

void setStatFile(std::string statFile) {
statFile_ = statFile;
LOG(INFO) << fmt::format("Changed to using memory stat file {}", statFile_);
memStatFile_ = statFile;
LOG(INFO) << fmt::format(
"Changed to using memory stat file {}", memStatFile_);
}

void setMemMaxFile(const std::string& memMaxFile) {
memMaxFile_ = memMaxFile;
LOG(INFO) << fmt::format(
"Changed to using memory max file {}", memMaxFile_);
}

void setMemInfoFile(const std::string& memInfoFile) {
memInfoFile_ = memInfoFile;
LOG(INFO) << fmt::format("Changed to using meminfo file {}", memInfoFile_);
}

void start() override {
// Check system-memory-gb < system-mem-limit-gb < memory limit for process.
auto* systemConfig = SystemConfig::instance();
int64_t systemMemoryInBytes = systemConfig->systemMemoryGb() << 30;
PRESTO_STARTUP_LOG(INFO)
<< fmt::format("System memory in bytes: {}", systemMemoryInBytes);

PRESTO_STARTUP_LOG(INFO) << fmt::format(
"System memory limit in bytes: {}", config_.systemMemLimitBytes);

auto memoryLimitForProcess = getMemoryLimitForProcess();
PRESTO_STARTUP_LOG(INFO) << fmt::format(
"Memory limit for process in bytes: {}", memoryLimitForProcess);

VELOX_CHECK_LE(
config_.systemMemLimitBytes,
memoryLimitForProcess,
"system memory limit = {} bytes is higher than the memory limit for process = {} bytes.",
config_.systemMemLimitBytes,
memoryLimitForProcess);

if (config_.systemMemLimitBytes < systemMemoryInBytes) {
LOG(WARNING) << "system-mem-limit-gb is smaller than system-memory-gb. "
<< "Expected: system-mem-limit-gb >= system-memory-gb.";
}

PeriodicMemoryChecker::start();
}

int64_t getMemoryLimitForProcess() {
// Set the memory limit for process to be the smaller number between
// /proc/meminfo and memMaxFile_.
int64_t memoryLimitForProcess = 0;
// meminfo's units is in kB.
folly::gen::byLine(memInfoFile_.c_str()) |
[&](const folly::StringPiece& line) -> void {
if (memoryLimitForProcess != 0) {
return;
}
memoryLimitForProcess = static_cast<int64_t>(
extractNumericConfigValueWithRegex(line, kMemTotalRegex) * 1024);
};

// For cgroup v1, memory.limit_in_bytes can default to a really big numeric
// value in bytes like 9223372036854771712 to represent that
// memory.limit_in_bytes is not set to a value. The default value here is
// set to PAGE_COUNTER_MAX, which is LONG_MAX/PAGE_SIZE on the 64-bit
// platform. The default value can vary based upon the platform's PAGE_SIZE.
// If memory.limit_in_bytes contains a really big numeric value, then we
// will use MemTotal from /proc/meminfo.

// For cgroup v2, memory.max can contain a numeric value in bytes or string
// "max" which represents no value has been set. If memory.max contains
// "max", then we will use MemTotal from /proc/meminfo.
if (!memMaxFile_.empty()) {
folly::gen::byLine(memMaxFile_.c_str()) |
[&](const folly::StringPiece& line) -> void {
if (line == "max") {
return;
}
memoryLimitForProcess =
std::min(memoryLimitForProcess, folly::to<int64_t>(line));
return;
};
}

// Unit is in bytes.
return memoryLimitForProcess;
}

protected:
Expand Down Expand Up @@ -80,8 +182,8 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker {
size_t inactiveAnon = 0;
size_t activeAnon = 0;

if (statFile_ != "None") {
folly::gen::byLine(statFile_.c_str()) |
if (!memStatFile_.empty()) {
folly::gen::byLine(memStatFile_.c_str()) |
[&](const folly::StringPiece& line) -> void {
if (inactiveAnon == 0) {
inactiveAnon =
Expand All @@ -103,7 +205,7 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker {
}

// Last resort use host machine info.
folly::gen::byLine("/proc/meminfo") |
folly::gen::byLine(memInfoFile_.c_str()) |
[&](const folly::StringPiece& line) -> void {
if (memAvailable == 0) {
memAvailable =
Expand Down Expand Up @@ -143,10 +245,16 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker {
const boost::regex kInactiveAnonRegex{R"!(inactive_anon\s*(\d+)\s*)!"};
const boost::regex kActiveAnonRegex{R"!(active_anon\s*(\d+)\s*)!"};
const boost::regex kMemAvailableRegex{R"!(MemAvailable:\s*(\d+)\s*kB)!"};
const boost::regex kMemTotalRegex{R"!(MemTotal:\s*(\d+)\s*kB)!"};
const char* kCgroupV1Path = "/sys/fs/cgroup/memory/memory.stat";
const char* kCgroupV2Path = "/sys/fs/cgroup/memory.stat";
std::string statFile_;
const boost::regex kMemTotalRegex{R"!(MemTotal:\s*(\d+)\s+kB)!"};
const char* kCgroupV1Path = "/sys/fs/cgroup/memory";
const char* kCgroupV1MemStatFile = "/sys/fs/cgroup/memory/memory.stat";
const char* kCgroupV2MemStatFile = "/sys/fs/cgroup/memory.stat";
const char* kCgroupV1MaxMemFile =
"/sys/fs/cgroup/memory/memory.limit_in_bytes";
const char* kCgroupV2MaxMemFile = "/sys/fs/cgroup/memory.max";
std::string memInfoFile_ = "/proc/meminfo";
std::string memStatFile_;
std::string memMaxFile_;

size_t extractNumericConfigValueWithRegex(
const folly::StringPiece& line,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,10 @@ class PeriodicMemoryChecker {

/// Starts the 'PeriodicMemoryChecker'. A background scheduler will be
/// launched to perform the checks. This should only be called once.
void start();
virtual void start();

/// Stops the 'PeriodicMemoryChecker'.
void stop();
virtual void stop();

protected:
/// Returns current system memory usage. The returned value is used to compare
Expand Down
Loading

0 comments on commit b52384a

Please sign in to comment.