pm2: add crash-loop governor to all apps; lower srebot max_memory_restart to 12000M
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+21
-1
@@ -9,18 +9,33 @@ const DEPLOY_PATH = __dirname;
|
||||
// Both bots share one venv at BOTS/SHARED/.venv (built from SHARED/requirements.txt).
|
||||
const PY_INTERPRETER = `${DEPLOY_PATH}/../SHARED/.venv/bin/python`;
|
||||
|
||||
// Shared crash-loop governor. Without this, `autorestart` relaunches a process
|
||||
// that dies on startup forever (every restart_delay). Several apps here share
|
||||
// SHARED/.env + SHARED/.venv + the STORAGE volume, so one bad shared config can
|
||||
// make them crash-loop at once and peg all 8 cores until the box is unreachable
|
||||
// (and `pm2 resurrect` then reproduces it on every boot). With this, PM2 gives
|
||||
// up after max_restarts attempts that each fail to stay up min_uptime ms,
|
||||
// marking the app `errored` instead of hammering the CPU. exp_backoff grows the
|
||||
// delay between attempts (supersedes restart_delay during a crash loop).
|
||||
const RESTART_POLICY = {
|
||||
max_restarts: 10,
|
||||
min_uptime: 10000,
|
||||
exp_backoff_restart_delay: 200,
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
apps: [
|
||||
// Discord Bot
|
||||
{
|
||||
name: 'srebot',
|
||||
...RESTART_POLICY,
|
||||
script: 'start_bot.py',
|
||||
interpreter: PY_INTERPRETER,
|
||||
cwd: DEPLOY_PATH,
|
||||
instances: 1,
|
||||
autorestart: true,
|
||||
watch: false,
|
||||
max_memory_restart: '16000M',
|
||||
max_memory_restart: '12000M',
|
||||
log_file: './logs/bot_combined.log',
|
||||
out_file: './logs/bot_out.log',
|
||||
error_file: './logs/bot_error.log',
|
||||
@@ -33,6 +48,7 @@ module.exports = {
|
||||
// API Server (reads SREBOT_API_PORT from .env)
|
||||
{
|
||||
name: 'srebot-api',
|
||||
...RESTART_POLICY,
|
||||
script: 'server.js',
|
||||
interpreter: 'node',
|
||||
node_args: '--max-old-space-size=6144',
|
||||
@@ -54,6 +70,7 @@ module.exports = {
|
||||
// Reads TSS_API_HOST/PORT from .env (default 127.0.0.1:6100).
|
||||
{
|
||||
name: 'tssbot-api',
|
||||
...RESTART_POLICY,
|
||||
script: PY_INTERPRETER,
|
||||
args: '-m web.main',
|
||||
interpreter: 'none',
|
||||
@@ -79,6 +96,7 @@ module.exports = {
|
||||
// Reads SREBOT_EXTERNAL_HOST/PORT/UPSTREAM_URL + STORAGE_VOL_PATH from .env.
|
||||
{
|
||||
name: 'relay-gateway',
|
||||
...RESTART_POLICY,
|
||||
script: PY_INTERPRETER,
|
||||
args: '-m relay_gateway.gateway',
|
||||
interpreter: 'none',
|
||||
@@ -100,6 +118,7 @@ module.exports = {
|
||||
// Reads SREBOT_WEBHOOK_PORT from .env.
|
||||
{
|
||||
name: 'srebot-webhook',
|
||||
...RESTART_POLICY,
|
||||
script: 'github_webhook_updater.py',
|
||||
interpreter: PY_INTERPRETER,
|
||||
cwd: DEPLOY_PATH,
|
||||
@@ -119,6 +138,7 @@ module.exports = {
|
||||
// Website (reads SREBOT_WEB_PORT from .env)
|
||||
{
|
||||
name: 'srebot-web',
|
||||
...RESTART_POLICY,
|
||||
script: 'server.js',
|
||||
cwd: `${DEPLOY_PATH}/web`,
|
||||
instances: 3,
|
||||
|
||||
Reference in New Issue
Block a user