pm2: add crash-loop governor to all apps; lower srebot max_memory_restart to 12000M

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
deploy
2026-06-29 12:58:56 +00:00
parent a58a4e5c42
commit 010e356dc8
+21 -1
View File
@@ -9,18 +9,33 @@ const DEPLOY_PATH = __dirname;
// Both bots share one venv at BOTS/SHARED/.venv (built from SHARED/requirements.txt). // Both bots share one venv at BOTS/SHARED/.venv (built from SHARED/requirements.txt).
const PY_INTERPRETER = `${DEPLOY_PATH}/../SHARED/.venv/bin/python`; const PY_INTERPRETER = `${DEPLOY_PATH}/../SHARED/.venv/bin/python`;
// Shared crash-loop governor. Without this, `autorestart` relaunches a process
// that dies on startup forever (every restart_delay). Several apps here share
// SHARED/.env + SHARED/.venv + the STORAGE volume, so one bad shared config can
// make them crash-loop at once and peg all 8 cores until the box is unreachable
// (and `pm2 resurrect` then reproduces it on every boot). With this, PM2 gives
// up after max_restarts attempts that each fail to stay up min_uptime ms,
// marking the app `errored` instead of hammering the CPU. exp_backoff grows the
// delay between attempts (supersedes restart_delay during a crash loop).
const RESTART_POLICY = {
max_restarts: 10,
min_uptime: 10000,
exp_backoff_restart_delay: 200,
};
module.exports = { module.exports = {
apps: [ apps: [
// Discord Bot // Discord Bot
{ {
name: 'srebot', name: 'srebot',
...RESTART_POLICY,
script: 'start_bot.py', script: 'start_bot.py',
interpreter: PY_INTERPRETER, interpreter: PY_INTERPRETER,
cwd: DEPLOY_PATH, cwd: DEPLOY_PATH,
instances: 1, instances: 1,
autorestart: true, autorestart: true,
watch: false, watch: false,
max_memory_restart: '16000M', max_memory_restart: '12000M',
log_file: './logs/bot_combined.log', log_file: './logs/bot_combined.log',
out_file: './logs/bot_out.log', out_file: './logs/bot_out.log',
error_file: './logs/bot_error.log', error_file: './logs/bot_error.log',
@@ -33,6 +48,7 @@ module.exports = {
// API Server (reads SREBOT_API_PORT from .env) // API Server (reads SREBOT_API_PORT from .env)
{ {
name: 'srebot-api', name: 'srebot-api',
...RESTART_POLICY,
script: 'server.js', script: 'server.js',
interpreter: 'node', interpreter: 'node',
node_args: '--max-old-space-size=6144', node_args: '--max-old-space-size=6144',
@@ -54,6 +70,7 @@ module.exports = {
// Reads TSS_API_HOST/PORT from .env (default 127.0.0.1:6100). // Reads TSS_API_HOST/PORT from .env (default 127.0.0.1:6100).
{ {
name: 'tssbot-api', name: 'tssbot-api',
...RESTART_POLICY,
script: PY_INTERPRETER, script: PY_INTERPRETER,
args: '-m web.main', args: '-m web.main',
interpreter: 'none', interpreter: 'none',
@@ -79,6 +96,7 @@ module.exports = {
// Reads SREBOT_EXTERNAL_HOST/PORT/UPSTREAM_URL + STORAGE_VOL_PATH from .env. // Reads SREBOT_EXTERNAL_HOST/PORT/UPSTREAM_URL + STORAGE_VOL_PATH from .env.
{ {
name: 'relay-gateway', name: 'relay-gateway',
...RESTART_POLICY,
script: PY_INTERPRETER, script: PY_INTERPRETER,
args: '-m relay_gateway.gateway', args: '-m relay_gateway.gateway',
interpreter: 'none', interpreter: 'none',
@@ -100,6 +118,7 @@ module.exports = {
// Reads SREBOT_WEBHOOK_PORT from .env. // Reads SREBOT_WEBHOOK_PORT from .env.
{ {
name: 'srebot-webhook', name: 'srebot-webhook',
...RESTART_POLICY,
script: 'github_webhook_updater.py', script: 'github_webhook_updater.py',
interpreter: PY_INTERPRETER, interpreter: PY_INTERPRETER,
cwd: DEPLOY_PATH, cwd: DEPLOY_PATH,
@@ -119,6 +138,7 @@ module.exports = {
// Website (reads SREBOT_WEB_PORT from .env) // Website (reads SREBOT_WEB_PORT from .env)
{ {
name: 'srebot-web', name: 'srebot-web',
...RESTART_POLICY,
script: 'server.js', script: 'server.js',
cwd: `${DEPLOY_PATH}/web`, cwd: `${DEPLOY_PATH}/web`,
instances: 3, instances: 3,