From 341dae1913cdbeb9e3bfc81031026976a51d798b Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 1 Jul 2026 22:58:15 +0000 Subject: [PATCH] feat: replace PM2 with systemd --user services for production Runs tssbot-web, tssbot-webhook, and tssbot-backend as systemd --user units instead of PM2 processes. tssbot-web moves from a 2-worker PM2 cluster to a single instance, so deploys now restart it directly instead of doing a zero-downtime cluster reload. webhook.cjs now shells out to `systemctl --user restart` instead of `pm2 reload`, and PM2_RESTART_TARGETS/WEBHOOK_PM2_NAME are renamed to RESTART_TARGETS/WEBHOOK_SERVICE_NAME. scripts/install-systemd-services.sh symlinks the new unit files into ~/.config/systemd/user and enables them. Co-Authored-By: Claude Sonnet 5 --- README.md | 90 +++++++++++++--------- backend/README.md | 2 +- ecosystem.config.cjs | 114 ---------------------------- example.env | 4 +- scripts/install-systemd-services.sh | 22 ++++++ server.cjs | 8 +- systemd/tssbot-backend.service | 19 +++++ systemd/tssbot-web.service | 19 +++++ systemd/tssbot-webhook.service | 21 +++++ webhook.cjs | 78 ++++++++----------- 10 files changed, 172 insertions(+), 205 deletions(-) delete mode 100644 ecosystem.config.cjs create mode 100755 scripts/install-systemd-services.sh create mode 100644 systemd/tssbot-backend.service create mode 100644 systemd/tssbot-web.service create mode 100644 systemd/tssbot-webhook.service diff --git a/README.md b/README.md index ece99e2..1d41ef5 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The repo is split into: - `frontend/` - React + Vite + Tailwind v4 web shell - `backend/` - backend API service scaffold, ready for database-backed routes -- root process files - production frontend server, deploy webhook, PM2 config, and shared repo scripts +- root process files - production frontend server, deploy webhook, systemd unit files, and shared repo scripts Routes: @@ -42,7 +42,7 @@ The backend listens on by default and reads the SQLite databases configured by `TSS_BATTLES_DB` and `TSS_TEAMS_DB`. Keep it bound to `127.0.0.1` in production and let `tssbot-web` proxy public API requests. -## Production with PM2 +## Production with systemd On a fresh headless Ubuntu server, install the native build tools Rust crates need before the first backend build: @@ -58,13 +58,34 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh npm install npm run build npm run build:backend -pm2 start ecosystem.config.cjs +scripts/install-systemd-services.sh ``` -The production server runs on . PM2 starts the web app in -cluster mode with two workers by default, waits for each worker to signal that it -is ready, and then reloads workers one at a time during deploys. Override the -worker count with `WEB_INSTANCES`. +`scripts/install-systemd-services.sh` symlinks the unit files under `systemd/` +into `~/.config/systemd/user/`, then runs `systemctl --user daemon-reload` and +`enable --now` for all three services. These are user-level (`systemctl --user`) +units running as the deploy user, not system-wide units — no root needed to +manage them day to day. Because user services normally stop when the user logs +out, enable lingering once so they keep running: + +```sh +sudo loginctl enable-linger +``` + +The production server runs on , `tssbot-backend` on + (see `BACKEND_PORT`), and the webhook listener on +`WEBHOOK_PORT`. Each runs as a single instance — a deploy restart is a plain +`systemctl --user restart`, so expect a brief (roughly 1-2 second) connection +drop while `tssbot-web` restarts, rather than PM2-style zero-downtime cluster +reloads. + +Useful commands: + +```sh +systemctl --user status tssbot-web tssbot-webhook tssbot-backend +journalctl --user -u tssbot-web -f +systemctl --user restart tssbot-web +``` The server serves `/health` locally and only proxies the API routes used by the app: @@ -94,18 +115,15 @@ ship `X-Content-Type-Options`, `X-Frame-Options: DENY`, `Referrer-Policy`, HSTS (over HTTPS), and HTML responses include a Content Security Policy that allows only Cloudflare Turnstile and the CARTO basemap tiles. -Override the API target before starting PM2 if needed: +Override the API target by setting it in `.env` and restarting: ```sh -API_UPSTREAM=http://127.0.0.1:8080 pm2 start ecosystem.config.cjs +echo 'API_UPSTREAM=http://127.0.0.1:8080' >> .env +systemctl --user restart tssbot-web ``` Set `PUBLIC_ORIGIN` to the public site origin in production, especially behind a -reverse proxy: - -```sh -PUBLIC_ORIGIN=https://your-domain.example pm2 start ecosystem.config.cjs -``` +reverse proxy (same `.env` + restart pattern as above). Optional API protection tuning: @@ -127,14 +145,12 @@ SITE_SESSION_TTL_SECONDS=43200 Successful Turnstile verification sets signed, HttpOnly Turnstile and site-session cookies. `/api/*` and `/data/*` requests must present those cookies plus same-origin browser request metadata, so the data is served to verified active -site sessions instead of as an open public API. All PM2 web instances must share -the same `SITE_SESSION_SECRET`. +site sessions instead of as an open public API. -On startup, the web server preloads the critical public snapshots before -signalling PM2 `ready`: team leaderboard, player leaderboard, home teams, and -recent games. `/health` includes a `public_data` block with the latest preload -status. A same-origin `POST /api/cache/prewarm` refreshes those snapshots on -demand. +On startup, the web server preloads the critical public snapshots: team +leaderboard, player leaderboard, home teams, and recent games. `/health` +includes a `public_data` block with the latest preload status. A same-origin +`POST /api/cache/prewarm` refreshes those snapshots on demand. ## Reverse proxy / Cloudflare @@ -205,16 +221,15 @@ The webhook process listens on port `3011` at `/github`. Configure GitHub to sen push events there. A webhook secret is required — without `GITHUB_WEBHOOK_SECRET`, the webhook -rejects every request: +rejects every request. Put it in `.env` in the project root (recommended over +inlining the secret in a shell command, which writes it to shell history), then +restart: ```sh -GITHUB_WEBHOOK_SECRET=your-secret pm2 start ecosystem.config.cjs +echo 'GITHUB_WEBHOOK_SECRET=your-secret' >> .env +systemctl --user restart tssbot-webhook ``` -On PowerShell, set `$env:GITHUB_WEBHOOK_SECRET = "your-secret"` before starting -PM2, or put the value in a `.env` file in the project root (recommended over -inlining the secret in a shell command, which writes it to shell history). - The webhook only deploys pushes whose `ref` is in `GITHUB_WEBHOOK_REFS` (default `refs/heads/main`). Optionally pin the repository: @@ -239,21 +254,22 @@ npm ci --include=dev --include=optional npm run build -- --outDir ../dist-next cargo build --manifest-path backend/Cargo.toml --release # the webhook promotes dist-next to dist after carrying over old hashed assets -pm2 reload tssbot-web --update-env -pm2 reload tssbot-backend --update-env +systemctl --user restart tssbot-web tssbot-backend ``` -Only processes listed in `PM2_RESTART_TARGETS` are reloaded. The default is -`tssbot-web,tssbot-backend`, so unrelated PM2 processes are left alone. The web server handles -`SIGINT` and `SIGTERM` by closing its listener and SQLite handles before exit, -which lets PM2 finish reloads without dropping active requests. The webhook -exits after 24 hours so PM2 restarts it cleanly. +Only services listed in `RESTART_TARGETS` are restarted. The default is +`tssbot-web,tssbot-backend`, so unrelated systemd units are left alone. The web +server handles `SIGINT` and `SIGTERM` by closing its listener and SQLite +handles before exit, giving systemd a clean shutdown within `TimeoutStopSec` +instead of a hard kill. The webhook exits cleanly every 24 hours; its unit uses +`Restart=always` so systemd relaunches it right away. -When webhook code changes are deployed, restart the webhook process once so PM2 -loads the updated listener: +When webhook code changes are deployed, the webhook restarts itself once +(delayed so its own deploy response/notifications land first) so it loads the +updated listener: ```sh -pm2 reload tssbot-webhook --update-env +systemctl --user restart tssbot-webhook ``` The webhook listener reads `.env` on startup. To send Discord notifications for diff --git a/backend/README.md b/backend/README.md index 5ba5a19..a142e4b 100644 --- a/backend/README.md +++ b/backend/README.md @@ -13,7 +13,7 @@ before falling back to the current working directory. - `BACKEND_HOST` bind host, default `127.0.0.1` - `BACKEND_ALLOWED_ORIGINS` comma-separated browser origins allowed by CORS -Both paths can be absolute or relative to the repo root when run through the root scripts/PM2. +Both paths can be absolute or relative to the repo root when run through the root scripts/systemd units. ## Vehicle translation + icons diff --git a/ecosystem.config.cjs b/ecosystem.config.cjs deleted file mode 100644 index 873e303..0000000 --- a/ecosystem.config.cjs +++ /dev/null @@ -1,114 +0,0 @@ -const fs = require('node:fs') -const path = require('node:path') - -function loadEnvFile() { - const envPath = path.join(__dirname, '.env') - if (!fs.existsSync(envPath)) return - - const lines = fs.readFileSync(envPath, 'utf8').split(/\r?\n/) - for (const line of lines) { - const trimmed = line.trim() - if (!trimmed || trimmed.startsWith('#')) continue - - const separatorIndex = trimmed.indexOf('=') - if (separatorIndex === -1) continue - - const key = trimmed.slice(0, separatorIndex).trim() - let value = trimmed.slice(separatorIndex + 1).trim() - if ( - (value.startsWith('"') && value.endsWith('"')) || - (value.startsWith("'") && value.endsWith("'")) - ) { - value = value.slice(1, -1) - } - - if (key && (!process.env[key] || process.env[key] === '')) { - process.env[key] = value - } - } -} - -loadEnvFile() - -// Crash-loop governor: after max_restarts attempts that each fail to stay up -// min_uptime ms, PM2 marks the app `errored` and stops relaunching it, instead -// of restarting forever and pegging the CPU. -const RESTART_POLICY = { - max_restarts: 10, - min_uptime: 10000, - exp_backoff_restart_delay: 200, -} - -module.exports = { - apps: [ - { - name: 'tssbot-web', - ...RESTART_POLICY, - script: 'server.cjs', - cwd: __dirname, - exec_mode: 'cluster', - instances: process.env.WEB_INSTANCES || 2, - wait_ready: true, - listen_timeout: 10000, - kill_timeout: 10000, - env: { - NODE_ENV: 'production', - PORT: process.env.PORT || 3010, - API_UPSTREAM: process.env.API_UPSTREAM || 'http://127.0.0.1:6000', - PUBLIC_ORIGIN: process.env.PUBLIC_ORIGIN || '', - UPTIME_STORAGE_DIR: process.env.UPTIME_STORAGE_DIR || '~/tsswebstorage', - UPTIME_DATABASE_FILE: process.env.UPTIME_DATABASE_FILE || 'uptime.sqlite', - UPTIME_SAMPLE_INTERVAL_MS: process.env.UPTIME_SAMPLE_INTERVAL_MS || 1800000, - UPTIME_HISTORY_LIMIT: process.env.UPTIME_HISTORY_LIMIT || 336, - API_CACHE_TTL_MS: process.env.API_CACHE_TTL_MS || 15000, - API_RATE_LIMIT_WINDOW_MS: process.env.API_RATE_LIMIT_WINDOW_MS || 60000, - API_RATE_LIMIT_MAX: process.env.API_RATE_LIMIT_MAX || 120, - TRUST_PROXY: process.env.TRUST_PROXY || 'cloudflare', - TRUSTED_UPSTREAM_IPS: process.env.TRUSTED_UPSTREAM_IPS || '127.0.0.1,::1,::ffff:127.0.0.1', - SITE_SESSION_SECRET: process.env.SITE_SESSION_SECRET || process.env.API_SESSION_SECRET || process.env.TURNSTILE_SECRET_KEY || '', - SITE_SESSION_TTL_SECONDS: process.env.SITE_SESSION_TTL_SECONDS || 43200, - TURNSTILE_SECRET_KEY: process.env.TURNSTILE_SECRET_KEY || '', - }, - }, - { - name: 'tssbot-webhook', - ...RESTART_POLICY, - script: 'webhook.cjs', - cwd: __dirname, - autorestart: true, - env: { - NODE_ENV: 'production', - WEBHOOK_PORT: process.env.WEBHOOK_PORT || 3011, - WEBHOOK_PM2_NAME: process.env.WEBHOOK_PM2_NAME || 'tssbot-webhook', - GITHUB_WEBHOOK_SECRET: process.env.GITHUB_WEBHOOK_SECRET || '', - GITHUB_WEBHOOK_REFS: process.env.GITHUB_WEBHOOK_REFS || 'refs/heads/main', - GITHUB_WEBHOOK_REPOSITORY: process.env.GITHUB_WEBHOOK_REPOSITORY || '', - PM2_RESTART_TARGETS: process.env.PM2_RESTART_TARGETS || 'tssbot-web,tssbot-backend', - DISCORD_WEBHOOK_URL: process.env.DISCORD_WEBHOOK_URL || '', - DISCORD_INCLUDE_PATCH: process.env.DISCORD_INCLUDE_PATCH || 'false', - }, - }, - { - name: 'tssbot-backend', - ...RESTART_POLICY, - script: process.platform === 'win32' - ? 'backend/target/release/tssbot-backend.exe' - : 'backend/target/release/tssbot-backend', - cwd: __dirname, - autorestart: true, - env: { - NODE_ENV: 'production', - BACKEND_PORT: process.env.BACKEND_PORT || 6000, - BACKEND_HOST: process.env.BACKEND_HOST || '127.0.0.1', - BACKEND_ALLOWED_ORIGINS: process.env.BACKEND_ALLOWED_ORIGINS || process.env.PUBLIC_ORIGIN || '', - TSS_BATTLES_DB: process.env.TSS_BATTLES_DB || 'tss_battles.db', - TSS_TEAMS_DB: process.env.TSS_TEAMS_DB || 'tss_teams.db', - // Vehicle name + icon caches (built by the bots in the shared STORAGE volume). - VEHICLE_TRANSLATIONS_JSON: process.env.VEHICLE_TRANSLATIONS_JSON - || '/mnt/HC_Volume_105581488/STORAGE/CACHE/vehicle_translations.json', - VEHICLE_DATA_CACHE_JSON: process.env.VEHICLE_DATA_CACHE_JSON - || '/mnt/HC_Volume_105581488/STORAGE/CACHE/vehicle_data_cache.json', - }, - }, - ], -} diff --git a/example.env b/example.env index 619deef..82fa210 100644 --- a/example.env +++ b/example.env @@ -55,7 +55,9 @@ GITHUB_WEBHOOK_SECRET=change-me GITHUB_WEBHOOK_REFS=refs/heads/main # Optional: refuse pushes whose repository.full_name does not match (e.g. "owner/repo"). GITHUB_WEBHOOK_REPOSITORY= -PM2_RESTART_TARGETS=tssbot-web,tssbot-backend +# Comma-separated systemd --user unit names (without .service) restarted after a +# successful deploy build. +RESTART_TARGETS=tssbot-web,tssbot-backend DISCORD_WEBHOOK_URL=https://discord.com/api/webhooks/... # Set to "true" only if the Discord channel is private. Default omits the patch preview DISCORD_INCLUDE_PATCH=true diff --git a/scripts/install-systemd-services.sh b/scripts/install-systemd-services.sh new file mode 100755 index 0000000..2db5bfa --- /dev/null +++ b/scripts/install-systemd-services.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Installs/updates the tssbot-web systemd --user units and (re)starts them. +# Run this after cloning, and again any time a unit file under systemd/ changes. +set -euo pipefail + +repo_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +unit_dir="${HOME}/.config/systemd/user" +mkdir -p "${unit_dir}" + +for unit in tssbot-web tssbot-webhook tssbot-backend; do + ln -sf "${repo_dir}/systemd/${unit}.service" "${unit_dir}/${unit}.service" +done + +systemctl --user daemon-reload +systemctl --user enable --now tssbot-web.service tssbot-webhook.service tssbot-backend.service + +if [ "$(loginctl show-user "$(whoami)" -p Linger --value 2>/dev/null)" != "yes" ]; then + echo "Linger is not enabled for $(whoami) — user services will stop when you log out." + echo "Enable it with: sudo loginctl enable-linger $(whoami)" +fi + +systemctl --user status --no-pager tssbot-web.service tssbot-webhook.service tssbot-backend.service diff --git a/server.cjs b/server.cjs index 598fb68..af1b9f9 100644 --- a/server.cjs +++ b/server.cjs @@ -3510,10 +3510,10 @@ function shutdown() { // server.close() only fires its callback once every socket is gone, and idle // HTTP keep-alive sockets (held open by nginx/Cloudflare) never close on - // their own — so without this the worker hangs the full kill_timeout on every - // stop/reload, which is what wedges the PM2 cluster daemon. Close idle sockets - // immediately, let in-flight requests finish for a short grace period, then - // force the rest so shutdown completes well inside kill_timeout. + // their own — so without this the process hangs the full TimeoutStopSec on + // every stop/restart. Close idle sockets immediately, let in-flight requests + // finish for a short grace period, then force the rest so shutdown completes + // well inside TimeoutStopSec. server.closeIdleConnections() setTimeout(() => server.closeAllConnections(), 3000).unref() diff --git a/systemd/tssbot-backend.service b/systemd/tssbot-backend.service new file mode 100644 index 0000000..0b06254 --- /dev/null +++ b/systemd/tssbot-backend.service @@ -0,0 +1,19 @@ +[Unit] +Description=tssbot backend API service +After=network-online.target +Wants=network-online.target +StartLimitIntervalSec=100 +StartLimitBurst=10 + +[Service] +Type=simple +WorkingDirectory=%h/tssbot.web +ExecStart=%h/tssbot.web/backend/target/release/tssbot-backend +Restart=on-failure +RestartSec=200ms +RestartSteps=10 +RestartMaxDelaySec=10s +TimeoutStopSec=10 + +[Install] +WantedBy=default.target diff --git a/systemd/tssbot-web.service b/systemd/tssbot-web.service new file mode 100644 index 0000000..675ea3a --- /dev/null +++ b/systemd/tssbot-web.service @@ -0,0 +1,19 @@ +[Unit] +Description=tssbot-web production server +After=network-online.target +Wants=network-online.target +StartLimitIntervalSec=100 +StartLimitBurst=10 + +[Service] +Type=simple +WorkingDirectory=%h/tssbot.web +ExecStart=/usr/bin/node server.cjs +Restart=on-failure +RestartSec=200ms +RestartSteps=10 +RestartMaxDelaySec=10s +TimeoutStopSec=10 + +[Install] +WantedBy=default.target diff --git a/systemd/tssbot-webhook.service b/systemd/tssbot-webhook.service new file mode 100644 index 0000000..2995173 --- /dev/null +++ b/systemd/tssbot-webhook.service @@ -0,0 +1,21 @@ +[Unit] +Description=tssbot-web GitHub deploy webhook +After=network-online.target +Wants=network-online.target +StartLimitIntervalSec=100 +StartLimitBurst=10 + +[Service] +Type=simple +WorkingDirectory=%h/tssbot.web +ExecStart=/usr/bin/node webhook.cjs +# Always (not on-failure): the process deliberately exits 0 every 24h so +# systemd relaunches it with a clean listener. +Restart=always +RestartSec=200ms +RestartSteps=10 +RestartMaxDelaySec=10s +TimeoutStopSec=10 + +[Install] +WantedBy=default.target diff --git a/webhook.cjs b/webhook.cjs index 6ae6ce3..2c7c184 100644 --- a/webhook.cjs +++ b/webhook.cjs @@ -40,14 +40,14 @@ const PORT = Number(process.env.WEBHOOK_PORT || 3011) const SECRET = process.env.GITHUB_WEBHOOK_SECRET || '' const DISCORD_WEBHOOK_URL = process.env.DISCORD_WEBHOOK_URL || '' const DISCORD_INCLUDE_PATCH = /^(1|true|yes)$/i.test(String(process.env.DISCORD_INCLUDE_PATCH || '')) -const RESTART_TARGETS = (process.env.PM2_RESTART_TARGETS || 'tssbot-web,tssbot-backend') +const RESTART_TARGETS = (process.env.RESTART_TARGETS || 'tssbot-web,tssbot-backend') .split(',') .map((target) => target.trim()) .filter((target) => /^[A-Za-z0-9_.:-]{1,80}$/.test(target)) .filter(Boolean) -// This webhook's own PM2 process name — never reload it during its own deploy. -const SELF_PM2_NAME = process.env.WEBHOOK_PM2_NAME || 'tssbot-webhook' +// This webhook's own systemd unit name — never restart it inline during its own deploy. +const SELF_SERVICE_NAME = process.env.WEBHOOK_SERVICE_NAME || 'tssbot-webhook' const DIST_DIR = path.join(__dirname, 'dist') const NEXT_DIST_DIR = path.join(__dirname, 'dist-next') const PREVIOUS_DIST_DIR = path.join(__dirname, 'dist-previous') @@ -57,7 +57,7 @@ const WEBHOOK_HEADERS_TIMEOUT_MS = Number(process.env.WEBHOOK_HEADERS_TIMEOUT_MS // No deploy step may hang forever. A stalled `npm ci` (a native postinstall that // never returns) would otherwise block for hours with node_modules already // deleted — which is exactly what took the site down. These cap each step so a -// hang fails fast and aborts the deploy before any pm2 reload. +// hang fails fast and aborts the deploy before any systemctl restart. const DEPLOY_STEP_TIMEOUT_MS = Number(process.env.DEPLOY_STEP_TIMEOUT_MS || 15 * 60 * 1000) const DEPLOY_INSTALL_TIMEOUT_MS = Number(process.env.DEPLOY_INSTALL_TIMEOUT_MS || 8 * 60 * 1000) const ALLOWED_REFS = new Set( @@ -200,7 +200,6 @@ function commandFor(command) { } if (process.platform !== 'win32') return command if (command === 'npm') return 'npm.cmd' - if (command === 'pm2') return 'pm2.cmd' return command } @@ -218,7 +217,7 @@ function restartTargetsInclude(target) { } function pushTouchesWebhookRuntime(push) { - const runtimeFiles = new Set(['webhook.cjs', 'ecosystem.config.cjs']) + const runtimeFiles = new Set(['webhook.cjs', 'systemd/tssbot-webhook.service']) const commits = Array.isArray(push?.commits) ? push.commits : [] return commits.some((commit) => { const changed = [ @@ -230,28 +229,18 @@ function pushTouchesWebhookRuntime(push) { }) } -function scheduleSelfReload(reason) { - let resolvedCommand - try { - resolvedCommand = commandFor('pm2') - } catch (error) { - console.error(`could not schedule ${SELF_PM2_NAME} reload:`, error.message) - return - } - - console.log(`scheduling ${SELF_PM2_NAME} reload: ${reason}`) +function scheduleSelfRestart(reason) { + console.log(`scheduling ${SELF_SERVICE_NAME} restart: ${reason}`) + // Delayed + detached: `systemctl --user restart` sends SIGTERM to this very + // process once it starts, so fire it after this tick unrefs and let the + // deploy's response/notifications land first. setTimeout(() => { - const child = spawn( - resolvedCommand, - ['reload', 'ecosystem.config.cjs', '--only', SELF_PM2_NAME, '--update-env'], - { - cwd: __dirname, - env: process.env, - detached: true, - stdio: 'ignore', - shell: process.platform === 'win32', - }, - ) + const child = spawn('systemctl', ['--user', 'restart', `${SELF_SERVICE_NAME}.service`], { + cwd: __dirname, + env: process.env, + detached: true, + stdio: 'ignore', + }) child.unref() }, 1000).unref() } @@ -275,7 +264,7 @@ function run(command, args, options = {}) { stdio: 'inherit', }) - // Kill the step if it hangs so deploy() aborts before any pm2 reload instead + // Kill the step if it hangs so deploy() aborts before any systemctl restart instead // of wedging here indefinitely (see DEPLOY_STEP_TIMEOUT_MS above). const timeoutMs = Number(options.timeoutMs) > 0 ? Number(options.timeoutMs) : DEPLOY_STEP_TIMEOUT_MS let timedOut = false @@ -441,7 +430,7 @@ async function ensureBuildDependencies(previousHead) { } // Hard gate: better-sqlite3 must actually load after the install, or abort the - // deploy here — before promoteBuiltDist()/pm2 reload — so a broken native build + // deploy here — before promoteBuiltDist()/systemctl restart — so a broken native build // can never be promoted to the running workers (which still hold a good binary). if (!(await betterSqliteLoads())) { throw new Error( @@ -708,27 +697,20 @@ async function deploy(push) { promoteBuiltDist() syncVehicleIcons() - // Reload via the ecosystem file (not by bare name) with --only so each deploy - // re-reads the committed env blocks (e.g. VEHICLE_* paths). `pm2 reload - // --update-env` would only merge the CLI's process.env and ignore the file. - // Exclude this webhook process from the awaited reload: killing the process - // running this deploy mid-command can interrupt the remaining reloads. - const reloadTargets = RESTART_TARGETS.filter((t) => t !== SELF_PM2_NAME) - if (reloadTargets.length) { - await run('pm2', [ - 'reload', - 'ecosystem.config.cjs', - '--only', - reloadTargets.join(','), - '--update-env', - ]) + // Each restarted service re-reads .env itself on startup, so a plain + // `systemctl restart` always picks up the committed env changes. + // Exclude this webhook process from the awaited restart: killing the process + // running this deploy mid-command can interrupt the remaining restarts. + const restartTargets = RESTART_TARGETS.filter((t) => t !== SELF_SERVICE_NAME) + if (restartTargets.length) { + await run('systemctl', ['--user', 'restart', ...restartTargets.map((t) => `${t}.service`)]) } await notifyDeployCompleted(push, diff) - if (restartTargetsInclude(SELF_PM2_NAME) || pushTouchesWebhookRuntime(push)) { - scheduleSelfReload( - restartTargetsInclude(SELF_PM2_NAME) - ? `${SELF_PM2_NAME} is listed in PM2_RESTART_TARGETS` + if (restartTargetsInclude(SELF_SERVICE_NAME) || pushTouchesWebhookRuntime(push)) { + scheduleSelfRestart( + restartTargetsInclude(SELF_SERVICE_NAME) + ? `${SELF_SERVICE_NAME} is listed in RESTART_TARGETS` : 'webhook runtime files changed', ) } @@ -860,6 +842,6 @@ webhookServer.listen(PORT, '0.0.0.0', () => { }) setTimeout(() => { - console.log('24 hour webhook refresh reached; exiting for PM2 restart') + console.log('24 hour webhook refresh reached; exiting for systemd restart') process.exit(0) }, RESTART_AFTER_MS).unref()