// g++ -std=c++17 -O2 -Wall -o cpudev_setperms cpudev_setperms.cpp
#include <iostream>
#include <string>
#include <vector>
#include <cstring>
#include <regex>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <pwd.h>
#include <grp.h>
#include <ftw.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <sstream>
#include <syslog.h>
static const std::vector<std::string> g_local_users = {"administrator", "powerstate", "prober"};
static const std::string g_slurm_path = "/opt/slurm/bin/";
static const std::string g_cpudev_path = "/sys/devices/system/cpu/";
static const std::string g_cpudev_group = "cpudev";
// ================ Get current username =======================================
std::string get_sudo_invoker() {
// First, check if the program was run via sudo
const char* sudoUser = getenv("SUDO_USER");
if (sudoUser && *sudoUser) {
return std::string(sudoUser);
}
// Fallback: use the real UID of the process
uid_t uid = getuid();
struct passwd* pw = getpwuid(uid);
if (pw && pw->pw_name) {
return std::string(pw->pw_name);
}
// Final fallback
return "unknown";
}
// ================ Run external binary safely and capture its stdout ==========
std::string run_program_capture(const std::string &prog, const std::vector<std::string> &args) {
int pipefd[2];
if (pipe(pipefd) == -1) {
syslog(LOG_ERR, "pipe() failed: %s", strerror(errno));
return "";
}
pid_t pid = fork();
if (pid < 0) {
syslog(LOG_ERR, "fork() failed: %s", strerror(errno));
close(pipefd[0]); close(pipefd[1]);
return "";
}
if (pid == 0) {
// child
dup2(pipefd[1], STDOUT_FILENO);
close(pipefd[0]);
close(pipefd[1]);
std::vector<char*> argv;
argv.reserve(args.size() + 2);
argv.push_back(const_cast<char*>(prog.c_str()));
for (const auto &a : args)
argv.push_back(const_cast<char*>(a.c_str()));
argv.push_back(nullptr);
execv(prog.c_str(), argv.data());
_exit(127);
}
// parent
close(pipefd[1]);
std::string out;
char buf[512];
ssize_t n;
while ((n = read(pipefd[0], buf, sizeof(buf))) > 0) {
out.append(buf, buf + n);
}
close(pipefd[0]);
int status = 0;
waitpid(pid, &status, 0);
if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
syslog(LOG_WARNING, "Program %s exited with status %d", prog.c_str(), WEXITSTATUS(status));
} else if (WIFSIGNALED(status)) {
syslog(LOG_WARNING, "Program %s terminated by signal %d", prog.c_str(), WTERMSIG(status));
}
while (!out.empty() && (out.back() == '\n' || out.back() == '\r'))
out.pop_back();
return out;
}
// ================ get hostname (safe) ========================================
std::string get_hostname() {
char host[256];
if (gethostname(host, sizeof(host)) == 0) {
return std::string(host);
}
return "";
}
// ================ get Slurm JobID (first token) ==============================
std::string get_slurm_job_id(const std::string &user) {
std::string prog = g_slurm_path + "squeue";
std::string host = get_hostname();
if (host.empty()) return "";
std::vector<std::string> args = {"--noheader", ("--nodelist=" + host), ("--user=" + user), "--Format=JobID"};
std::string out = run_program_capture(prog, args);
if (out.empty()) return "";
size_t pos = out.find('\n');
std::string firstline = (pos == std::string::npos) ? out : out.substr(0, pos);
std::istringstream iss(firstline);
std::string token;
if (!(iss >> token)) return "";
std::regex jobre("^\\d+$");
if (std::regex_match(token, jobre)) return token;
return "";
}
// ================ parse "OverSubscribe=" value from scontrol output ===========
std::string get_over_subscribe_flag(const std::string &job_id) {
std::string prog = g_slurm_path + "scontrol";
std::vector<std::string> args = {"show", "job", job_id};
std::string out = run_program_capture(prog, args);
if (out.empty()) return "";
std::string key = "OverSubscribe=";
size_t p = out.find(key);
if (p == std::string::npos) return "";
p += key.size();
size_t q = p;
while (q < out.size() && !isspace((unsigned char)out[q])) ++q;
return out.substr(p, q - p);
}
// ================ Check if user is local (from hard-coded list) =============
bool is_local_user(const std::string &user) {
for (auto &u : g_local_users) if (u == user) return true;
return false;
}
// ================ Resolve group name to gid =================================
bool lookup_gid(const std::string &groupname, gid_t &out_gid) {
struct group *g = getgrnam(groupname.c_str());
if (!g) {
syslog(LOG_ERR, "getgrnam('%s') failed", groupname.c_str());
return false;
}
out_gid = g->gr_gid;
return true;
}
// Global state for nftw callback
static gid_t g_cpudev_gid = (gid_t)-1;
static bool g_do_chgrp = true;
static bool g_do_chmod_g_eq_u = true;
static int g_change_errors = 0;
// ================ nftw callback ==============================================
int nftw_callback(const char *fpath, const struct stat *sb, int typeflag, struct FTW * /*ftwbuf*/) {
(void)typeflag; // FTW_PHYS used so symlinks won't be followed
// Change group preserving owner
if (g_do_chgrp) {
if (chown(fpath, sb->st_uid, g_cpudev_gid) != 0) {
// log occasionally; avoid extremely noisy logs - increment counter and log sample
++g_change_errors;
if (g_change_errors <= 5) {
syslog(LOG_WARNING, "chown failed on %s: %s", fpath, strerror(errno));
} else if (g_change_errors == 6) {
syslog(LOG_WARNING, "Further chown failures suppressed (many)");
}
}
}
// Set group bits equal to owner bits (g = u)
if (g_do_chmod_g_eq_u) {
mode_t cur = sb->st_mode;
mode_t ubits = (cur & S_IRWXU);
mode_t new_mode = (cur & ~S_IRWXG) | ((ubits >> 3) & S_IRWXG);
if ((cur & S_IRWXG) != (new_mode & S_IRWXG)) {
if (chmod(fpath, new_mode) != 0) {
++g_change_errors;
if (g_change_errors <= 5) {
syslog(LOG_WARNING, "chmod failed on %s: %s", fpath, strerror(errno));
} else if (g_change_errors == 6) {
syslog(LOG_WARNING, "Further chmod failures suppressed (many)");
}
}
}
}
return 0; // continue
}
// ================ Recursively operate on g_cpudev_path safely ===============
bool apply_cpudev_changes() {
if (!lookup_gid(g_cpudev_group.c_str(), g_cpudev_gid)) {
syslog(LOG_ERR, "Group '%s' not found", g_cpudev_group.c_str());
return false;
}
g_change_errors = 0;
// Use 20 file descriptors at the same time.
// NFTW_PHYS prevents following symlinks (equivalent to chmod -P)
if (nftw(g_cpudev_path.c_str(), nftw_callback, 20, FTW_PHYS) != 0) {
syslog(LOG_ERR, "nftw failed on %s: %s", g_cpudev_path.c_str(), strerror(errno));
return false;
}
if (g_change_errors > 0) {
syslog(LOG_WARNING, "Completed with %d change errors under %s", g_change_errors, g_cpudev_path.c_str());
std::clog << "(WW) Completed with " << g_change_errors
<< " change errors under " << g_cpudev_path << std::endl;
} else {
syslog(LOG_INFO, "Successfully updated ownership and permissions under %s", g_cpudev_path.c_str());
}
return (g_change_errors == 0);
}
// ================ main =======================================================
int main() {
// open syslog
openlog("cpudev_setperms", LOG_PID | LOG_CONS, LOG_DAEMON);
syslog(LOG_INFO, "Program start");
std::string user = get_sudo_invoker();
syslog(LOG_INFO, "Invoked by user: %s", user.c_str());
std::string job_id = get_slurm_job_id(user);
if (!job_id.empty()) {
syslog(LOG_INFO, "Found SLURM JobID %s for user %s", job_id.c_str(), user.c_str());
} else {
syslog(LOG_INFO, "No SLURM JobID found for user %s on this node", user.c_str());
}
std::string over_sub;
if (!job_id.empty()) {
over_sub = get_over_subscribe_flag(job_id);
syslog(LOG_INFO, "OverSubscribe for job %s = '%s'", job_id.c_str(), over_sub.c_str());
}
bool local = is_local_user(user);
if (local) syslog(LOG_INFO, "User %s is in local user list", user.c_str());
if (over_sub == "NO" || local) {
syslog(LOG_INFO, "Proceeding to change ownership/permissions for %s", g_cpudev_path.c_str());
if (!apply_cpudev_changes()) {
syslog(LOG_ERR, "Failed to update ownership/permissions under %s", g_cpudev_path.c_str());
std::cerr << "(EE) Failed to update some ownership/permission entries under " << g_cpudev_path << "\n";
closelog();
return 3;
}
syslog(LOG_INFO, "Completed ownership/permission updates for user %s", user.c_str());
std::cout << "(II) Permissions and ownership updated successfully for user: " << user << "\n";
closelog();
return 0;
} else {
syslog(LOG_WARNING, "Node is NOT exclusively allocated and user is not local: aborting for user %s", user.c_str());
std::cerr << "(EE) This node is NOT exclusively allocated via SLURM, you cannot run this program.\n";
closelog();
return 4;
}
}