diff --git a/.gitignore b/.gitignore index 6d97866..99df0d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ node_modules dist +dist-next +dist-previous .env .env.local .DS_Store @@ -7,4 +9,4 @@ npm-debug.log* vite-dev*.log server-local*.log .local-storage/ -.claude/ \ No newline at end of file +.claude/ diff --git a/README.md b/README.md index 7783257..f600e04 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,12 @@ npm run build pm2 start ecosystem.config.cjs ``` -The production server runs on . It serves `/health` +The production server runs on . PM2 starts the web app in +cluster mode with two workers by default, waits for each worker to signal that it +is ready, and then reloads workers one at a time during deploys. Override the +worker count with `WEB_INSTANCES`. + +The server serves `/health` locally and only proxies the API routes used by the app: - `GET /api/tss/leaderboard/teams?limit=1..100` @@ -171,14 +176,17 @@ The default deploy flow is: ```sh git pull --ff-only -npm install --production=false --include=dev --include=optional -npm run build +npm ci --include=dev --include=optional +npm run build -- --outDir dist-next +# the webhook promotes dist-next to dist after carrying over old hashed assets pm2 reload tssbot-web --update-env ``` Only processes listed in `PM2_RESTART_TARGETS` are reloaded. The default is -`tssbot-web`, so unrelated PM2 processes are left alone. The webhook exits after -24 hours so PM2 restarts it cleanly. +`tssbot-web`, so unrelated PM2 processes are left alone. The web server handles +`SIGINT` and `SIGTERM` by closing its listener and SQLite handles before exit, +which lets PM2 finish reloads without dropping active requests. The webhook +exits after 24 hours so PM2 restarts it cleanly. When webhook code changes are deployed, restart the webhook process once so PM2 loads the updated listener: diff --git a/ecosystem.config.cjs b/ecosystem.config.cjs index dd2b57c..a390fad 100644 --- a/ecosystem.config.cjs +++ b/ecosystem.config.cjs @@ -4,6 +4,11 @@ module.exports = { name: 'tssbot-web', script: 'server.cjs', cwd: __dirname, + exec_mode: 'cluster', + instances: process.env.WEB_INSTANCES || 2, + wait_ready: true, + listen_timeout: 10000, + kill_timeout: 10000, env: { NODE_ENV: 'production', PORT: process.env.PORT || 3010, diff --git a/public/light_fury_match.jpg b/public/light_fury_match.jpg deleted file mode 100755 index bb918e6..0000000 Binary files a/public/light_fury_match.jpg and /dev/null differ diff --git a/server.cjs b/server.cjs index bc882c2..7ab12d3 100644 --- a/server.cjs +++ b/server.cjs @@ -59,6 +59,7 @@ const MAX_TEAM_NAME_LENGTH = 80 const MAX_CACHE_ENTRIES = 200 const MAX_RATE_LIMIT_KEYS = 1000 const MAX_ANALYTICS_BODY_BYTES = 16 * 1024 +const RUN_BACKGROUND_JOBS = !process.env.NODE_APP_INSTANCE || process.env.NODE_APP_INSTANCE === '0' const TRUST_PROXY = (() => { const raw = String(process.env.TRUST_PROXY ?? 'cloudflare').trim().toLowerCase() @@ -241,6 +242,7 @@ function ensureAnalyticsDb() { analyticsDb = new Database(path.join(storageDir, ANALYTICS_DATABASE_FILE)) analyticsDb.pragma('journal_mode = WAL') + analyticsDb.pragma('busy_timeout = 5000') analyticsDb.exec(` create table if not exists viewer_events ( id integer primary key autoincrement, @@ -331,6 +333,7 @@ function ensureUptimeDb() { uptimeDb = new Database(path.join(storageDir, UPTIME_DATABASE_FILE)) uptimeDb.pragma('journal_mode = WAL') + uptimeDb.pragma('busy_timeout = 5000') uptimeDb.exec(` create table if not exists uptime_snapshots ( id integer primary key autoincrement, @@ -433,12 +436,14 @@ async function uptimeHistory() { } } +let uptimeSamplerTimer = null + function startUptimeSampler() { takeUptimeSnapshot().catch((error) => { console.error('Initial uptime snapshot failed:', error) }) - setInterval(() => { + uptimeSamplerTimer = setInterval(() => { takeUptimeSnapshot().catch((error) => { console.error('Uptime snapshot failed:', error) }) @@ -926,15 +931,17 @@ function purgeOldAnalytics(db) { const eventCutoff = new Date(Date.now() - ANALYTICS_RETENTION_DAYS * 24 * 60 * 60 * 1000).toISOString() const activeCutoff = new Date(Date.now() - ANALYTICS_ACTIVE_WINDOW_SECONDS * 3 * 1000).toISOString() - db.prepare(` - delete from viewer_events - where occurred_at < ? - `).run(eventCutoff) + db.transaction(() => { + db.prepare(` + delete from viewer_events + where occurred_at < ? + `).run(eventCutoff) - db.prepare(` - delete from active_viewers - where last_seen_at < ? - `).run(activeCutoff) + db.prepare(` + delete from active_viewers + where last_seen_at < ? + `).run(activeCutoff) + })() } function recordViewerEvent(req, payload) { @@ -978,44 +985,48 @@ function recordViewerEvent(req, payload) { } const now = new Date().toISOString() - db.prepare(` - insert into viewer_events - (occurred_at, visitor_id, session_id, ip_hash, event_type, page_path, page_title, - referrer, user_agent, browser, os, device, screen, language, timezone, - country, region, city, latitude, longitude, consent, metadata) - values - (@occurred_at, @visitor_id, @session_id, @ip_hash, @event_type, @page_path, @page_title, - @referrer, @user_agent, @browser, @os, @device, @screen, @language, @timezone, - @country, @region, @city, @latitude, @longitude, @consent, @metadata) - `).run({ ...event, occurred_at: now }) + const writeViewerEvent = db.transaction(() => { + db.prepare(` + insert into viewer_events + (occurred_at, visitor_id, session_id, ip_hash, event_type, page_path, page_title, + referrer, user_agent, browser, os, device, screen, language, timezone, + country, region, city, latitude, longitude, consent, metadata) + values + (@occurred_at, @visitor_id, @session_id, @ip_hash, @event_type, @page_path, @page_title, + @referrer, @user_agent, @browser, @os, @device, @screen, @language, @timezone, + @country, @region, @city, @latitude, @longitude, @consent, @metadata) + `).run({ ...event, occurred_at: now }) - db.prepare(` - insert into active_viewers - (session_id, visitor_id, ip_hash, first_seen_at, last_seen_at, page_path, page_title, - referrer, user_agent, browser, os, device, screen, language, timezone, - country, region, city, latitude, longitude) - values - (@session_id, @visitor_id, @ip_hash, @now, @now, @page_path, @page_title, - @referrer, @user_agent, @browser, @os, @device, @screen, @language, @timezone, - @country, @region, @city, @latitude, @longitude) - on conflict(session_id) do update set - last_seen_at = excluded.last_seen_at, - page_path = excluded.page_path, - page_title = excluded.page_title, - referrer = excluded.referrer, - user_agent = excluded.user_agent, - browser = excluded.browser, - os = excluded.os, - device = excluded.device, - screen = excluded.screen, - language = excluded.language, - timezone = excluded.timezone, - country = excluded.country, - region = excluded.region, - city = excluded.city, - latitude = excluded.latitude, - longitude = excluded.longitude - `).run({ ...event, now }) + db.prepare(` + insert into active_viewers + (session_id, visitor_id, ip_hash, first_seen_at, last_seen_at, page_path, page_title, + referrer, user_agent, browser, os, device, screen, language, timezone, + country, region, city, latitude, longitude) + values + (@session_id, @visitor_id, @ip_hash, @now, @now, @page_path, @page_title, + @referrer, @user_agent, @browser, @os, @device, @screen, @language, @timezone, + @country, @region, @city, @latitude, @longitude) + on conflict(session_id) do update set + last_seen_at = excluded.last_seen_at, + page_path = excluded.page_path, + page_title = excluded.page_title, + referrer = excluded.referrer, + user_agent = excluded.user_agent, + browser = excluded.browser, + os = excluded.os, + device = excluded.device, + screen = excluded.screen, + language = excluded.language, + timezone = excluded.timezone, + country = excluded.country, + region = excluded.region, + city = excluded.city, + latitude = excluded.latitude, + longitude = excluded.longitude + `).run({ ...event, now }) + }) + + writeViewerEvent() } function deleteViewerData(payload) { @@ -1790,11 +1801,49 @@ const server = http.createServer((req, res) => { server.listen(PORT, '0.0.0.0', () => { console.log(`tssbot-web serving http://localhost:${PORT}`) console.log(`proxying API requests to ${API_UPSTREAM}`) - console.log(`sampling uptime every ${Math.round(UPTIME_SAMPLE_INTERVAL_MS / 60000)} minutes`) + if (RUN_BACKGROUND_JOBS) { + console.log(`sampling uptime every ${Math.round(UPTIME_SAMPLE_INTERVAL_MS / 60000)} minutes`) + } else { + console.log('uptime sampler disabled in this worker') + } console.log(`storing uptime snapshots in ${path.join(uptimeStoragePath(), UPTIME_DATABASE_FILE)}`) console.log(`storing viewer analytics in ${path.join(uptimeStoragePath(), ANALYTICS_DATABASE_FILE)}`) if (!TURNSTILE_SECRET_KEY) { console.warn('TURNSTILE_SECRET_KEY is not set — Turnstile verification is disabled and gated endpoints will accept any request') } - startUptimeSampler() + if (RUN_BACKGROUND_JOBS) startUptimeSampler() + process.send?.('ready') }) + +let shuttingDown = false + +function closeDatabase(db, name) { + if (!db) return + + try { + db.close() + } catch (error) { + console.error(`Failed to close ${name} database:`, error) + } +} + +function shutdown() { + if (shuttingDown) return + shuttingDown = true + + if (uptimeSamplerTimer) clearInterval(uptimeSamplerTimer) + + server.close(() => { + closeDatabase(uptimeDb, 'uptime') + closeDatabase(analyticsDb, 'analytics') + process.exit(0) + }) + + setTimeout(() => { + console.error('Graceful shutdown timed out') + process.exit(1) + }, 10000).unref() +} + +process.on('SIGINT', shutdown) +process.on('SIGTERM', shutdown) diff --git a/webhook.cjs b/webhook.cjs index 8971bcb..2f6b4c5 100644 --- a/webhook.cjs +++ b/webhook.cjs @@ -44,6 +44,9 @@ const RESTART_TARGETS = (process.env.PM2_RESTART_TARGETS || 'tssbot-web') .split(',') .map((target) => target.trim()) .filter(Boolean) +const DIST_DIR = path.join(__dirname, 'dist') +const NEXT_DIST_DIR = path.join(__dirname, 'dist-next') +const PREVIOUS_DIST_DIR = path.join(__dirname, 'dist-previous') const ALLOWED_REFS = new Set( (process.env.GITHUB_WEBHOOK_REFS || 'refs/heads/main') .split(',') @@ -198,6 +201,49 @@ async function ensureBuildDependencies() { } } +function copyMissingFiles(fromDir, toDir) { + if (!fs.existsSync(fromDir) || !fs.existsSync(toDir)) return + + for (const entry of fs.readdirSync(fromDir, { withFileTypes: true })) { + const source = path.join(fromDir, entry.name) + const target = path.join(toDir, entry.name) + + if (entry.isDirectory()) { + fs.mkdirSync(target, { recursive: true }) + copyMissingFiles(source, target) + continue + } + + if (!fs.existsSync(target)) { + fs.copyFileSync(source, target) + } + } +} + +function promoteBuiltDist() { + const previousAssetsDir = path.join(DIST_DIR, 'assets') + const nextAssetsDir = path.join(NEXT_DIST_DIR, 'assets') + let movedCurrentDist = false + + copyMissingFiles(previousAssetsDir, nextAssetsDir) + + fs.rmSync(PREVIOUS_DIST_DIR, { recursive: true, force: true }) + + try { + if (fs.existsSync(DIST_DIR)) { + fs.renameSync(DIST_DIR, PREVIOUS_DIST_DIR) + movedCurrentDist = true + } + + fs.renameSync(NEXT_DIST_DIR, DIST_DIR) + } catch (error) { + if (movedCurrentDist && !fs.existsSync(DIST_DIR) && fs.existsSync(PREVIOUS_DIST_DIR)) { + fs.renameSync(PREVIOUS_DIST_DIR, DIST_DIR) + } + throw error + } +} + function postDiscordWebhook(payload) { if (!DISCORD_WEBHOOK_URL) return Promise.resolve() @@ -360,7 +406,8 @@ async function deploy(push) { await run('git', ['pull', '--ff-only']) diff = await deployDiff(push) await ensureBuildDependencies() - await run('npm', ['run', 'build']) + await run('npm', ['run', 'build', '--', '--outDir', 'dist-next']) + promoteBuiltDist() for (const target of RESTART_TARGETS) { await run('pm2', ['reload', target, '--update-env'])