aggressive data collection :PP

This commit is contained in:
2026-05-14 22:52:33 +01:00
parent 98f374a300
commit ef10da8b0b
6 changed files with 735 additions and 34 deletions
+338 -25
View File
@@ -1,4 +1,5 @@
const fs = require('node:fs')
const crypto = require('node:crypto')
const http = require('node:http')
const https = require('node:https')
const os = require('node:os')
@@ -42,6 +43,10 @@ const UPTIME_STORAGE_DIR = process.env.UPTIME_STORAGE_DIR || '~/tsswebstorage'
const UPTIME_DATABASE_FILE = process.env.UPTIME_DATABASE_FILE || 'uptime.sqlite'
const UPTIME_SAMPLE_INTERVAL_MS = Number(process.env.UPTIME_SAMPLE_INTERVAL_MS || 30 * 60 * 1000)
const UPTIME_HISTORY_LIMIT = Number(process.env.UPTIME_HISTORY_LIMIT || 336)
const ANALYTICS_DATABASE_FILE = process.env.ANALYTICS_DATABASE_FILE || 'viewers.sqlite'
const ANALYTICS_RETENTION_DAYS = Number(process.env.ANALYTICS_RETENTION_DAYS || 30)
const ANALYTICS_ACTIVE_WINDOW_SECONDS = Number(process.env.ANALYTICS_ACTIVE_WINDOW_SECONDS || 75)
const ANALYTICS_SALT = process.env.ANALYTICS_SALT || 'change-me-viewer-salt'
const API_CACHE_TTL_MS = Number(process.env.API_CACHE_TTL_MS || 15000)
const API_RATE_LIMIT_WINDOW_MS = Number(process.env.API_RATE_LIMIT_WINDOW_MS || 60000)
const API_RATE_LIMIT_MAX = Number(process.env.API_RATE_LIMIT_MAX || 120)
@@ -49,6 +54,7 @@ const DIST_DIR = path.join(__dirname, 'dist')
const MAX_TEAM_NAME_LENGTH = 80
const MAX_CACHE_ENTRIES = 200
const MAX_RATE_LIMIT_KEYS = 1000
const MAX_ANALYTICS_BODY_BYTES = 16 * 1024
const mimeTypes = {
'.css': 'text/css; charset=utf-8',
@@ -76,6 +82,7 @@ const jsonHeaders = {
const apiCache = new Map()
const rateLimits = new Map()
let uptimeDb = null
let analyticsDb = null
let latestUptimeSnapshot = null
function sendJson(res, status, body, headers = {}) {
@@ -138,6 +145,67 @@ function uptimeStoragePath() {
return path.resolve(expandHome(UPTIME_STORAGE_DIR))
}
function ensureAnalyticsDb() {
if (analyticsDb) return analyticsDb
const storageDir = uptimeStoragePath()
fs.mkdirSync(storageDir, { recursive: true })
analyticsDb = new Database(path.join(storageDir, ANALYTICS_DATABASE_FILE))
analyticsDb.pragma('journal_mode = WAL')
analyticsDb.exec(`
create table if not exists viewer_events (
id integer primary key autoincrement,
occurred_at text not null default (datetime('now')),
visitor_id text not null,
session_id text not null,
ip_hash text not null,
event_type text not null,
page_path text not null,
page_title text not null,
referrer text not null default '',
user_agent text not null default '',
browser text not null default 'Unknown',
os text not null default 'Unknown',
device text not null default 'Desktop',
screen text not null default '',
language text not null default '',
timezone text not null default '',
consent text not null default 'analytics',
metadata text not null default '{}'
);
create table if not exists active_viewers (
session_id text primary key,
visitor_id text not null,
ip_hash text not null,
first_seen_at text not null,
last_seen_at text not null,
page_path text not null,
page_title text not null,
referrer text not null default '',
user_agent text not null default '',
browser text not null default 'Unknown',
os text not null default 'Unknown',
device text not null default 'Desktop',
screen text not null default '',
language text not null default '',
timezone text not null default ''
);
create index if not exists viewer_events_occurred_at_idx
on viewer_events (occurred_at desc);
create index if not exists viewer_events_page_path_idx
on viewer_events (page_path, occurred_at desc);
create index if not exists active_viewers_last_seen_at_idx
on active_viewers (last_seen_at desc);
`)
return analyticsDb
}
function ensureUptimeDb() {
if (uptimeDb) return uptimeDb
@@ -308,6 +376,221 @@ function clientIp(req) {
return req.socket.remoteAddress || 'unknown'
}
function hashIp(ip) {
return crypto.createHash('sha256').update(`${ANALYTICS_SALT}:${ip}`).digest('hex')
}
function sanitizeText(value, maxLength = 200) {
return String(value || '').replace(/[\u0000-\u001f\u007f]/g, '').trim().slice(0, maxLength)
}
function sanitizePath(value) {
const raw = sanitizeText(value, 300)
if (!raw.startsWith('/')) return '/'
return raw
}
function parseClient(userAgent = '') {
const ua = String(userAgent)
let browser = 'Unknown'
let osName = 'Unknown'
let device = 'Desktop'
if (/Edg\//.test(ua)) browser = 'Microsoft Edge'
else if (/OPR\//.test(ua)) browser = 'Opera'
else if (/Firefox\//.test(ua)) browser = 'Firefox'
else if (/Chrome\//.test(ua) && !/Chromium\//.test(ua)) browser = 'Chrome'
else if (/Safari\//.test(ua) && /Version\//.test(ua)) browser = 'Safari'
if (/Windows NT/.test(ua)) osName = 'Windows'
else if (/Android/.test(ua)) osName = 'Android'
else if (/(iPhone|iPad|iPod)/.test(ua)) osName = 'iOS'
else if (/Mac OS X/.test(ua)) osName = 'macOS'
else if (/Linux/.test(ua)) osName = 'Linux'
if (/Mobi|Android|iPhone|iPod/.test(ua)) device = 'Mobile'
else if (/iPad|Tablet/.test(ua)) device = 'Tablet'
return { browser, os: osName, device }
}
function readJsonBody(req) {
return new Promise((resolve, reject) => {
const chunks = []
let size = 0
req.on('data', (chunk) => {
size += chunk.length
if (size > MAX_ANALYTICS_BODY_BYTES) {
reject(new Error('Request body too large'))
req.destroy()
return
}
chunks.push(chunk)
})
req.on('end', () => {
try {
const body = Buffer.concat(chunks).toString('utf8')
resolve(body ? JSON.parse(body) : {})
} catch {
reject(new Error('Invalid JSON body'))
}
})
req.on('error', reject)
})
}
function purgeOldAnalytics(db) {
db.prepare(`
delete from viewer_events
where occurred_at < datetime('now', ?)
`).run(`-${ANALYTICS_RETENTION_DAYS} days`)
db.prepare(`
delete from active_viewers
where last_seen_at < datetime('now', ?)
`).run(`-${ANALYTICS_ACTIVE_WINDOW_SECONDS * 3} seconds`)
}
function recordViewerEvent(req, payload) {
const db = ensureAnalyticsDb()
purgeOldAnalytics(db)
const serverClient = parseClient(req.headers['user-agent'] || '')
const event = {
visitor_id: sanitizeText(payload.visitor_id, 80) || crypto.randomUUID(),
session_id: sanitizeText(payload.session_id, 80) || crypto.randomUUID(),
ip_hash: hashIp(clientIp(req)),
event_type: ['page_view', 'heartbeat', 'consent'].includes(payload.event_type)
? payload.event_type
: 'heartbeat',
page_path: sanitizePath(payload.page_path),
page_title: sanitizeText(payload.page_title, 160),
referrer: sanitizeText(payload.referrer, 300),
user_agent: sanitizeText(req.headers['user-agent'] || payload.user_agent, 500),
browser: sanitizeText(payload.browser || serverClient.browser, 80),
os: sanitizeText(payload.os || serverClient.os, 80),
device: sanitizeText(payload.device || serverClient.device, 80),
screen: sanitizeText(payload.screen, 40),
language: sanitizeText(payload.language, 40),
timezone: sanitizeText(payload.timezone, 80),
consent: payload.consent === 'analytics' ? 'analytics' : '',
metadata: JSON.stringify(payload.metadata && typeof payload.metadata === 'object' ? payload.metadata : {}),
}
if (event.consent !== 'analytics') {
throw new Error('Analytics consent is required')
}
const now = new Date().toISOString()
db.prepare(`
insert into viewer_events
(occurred_at, visitor_id, session_id, ip_hash, event_type, page_path, page_title,
referrer, user_agent, browser, os, device, screen, language, timezone, consent, metadata)
values
(@occurred_at, @visitor_id, @session_id, @ip_hash, @event_type, @page_path, @page_title,
@referrer, @user_agent, @browser, @os, @device, @screen, @language, @timezone, @consent, @metadata)
`).run({ ...event, occurred_at: now })
db.prepare(`
insert into active_viewers
(session_id, visitor_id, ip_hash, first_seen_at, last_seen_at, page_path, page_title,
referrer, user_agent, browser, os, device, screen, language, timezone)
values
(@session_id, @visitor_id, @ip_hash, @now, @now, @page_path, @page_title,
@referrer, @user_agent, @browser, @os, @device, @screen, @language, @timezone)
on conflict(session_id) do update set
last_seen_at = excluded.last_seen_at,
page_path = excluded.page_path,
page_title = excluded.page_title,
referrer = excluded.referrer,
user_agent = excluded.user_agent,
browser = excluded.browser,
os = excluded.os,
device = excluded.device,
screen = excluded.screen,
language = excluded.language,
timezone = excluded.timezone
`).run({ ...event, now })
}
function viewerDashboard() {
const db = ensureAnalyticsDb()
purgeOldAnalytics(db)
const activeSince = `-${ANALYTICS_ACTIVE_WINDOW_SECONDS} seconds`
const active = db.prepare(`
select session_id, visitor_id, first_seen_at, last_seen_at, page_path, page_title,
referrer, browser, os, device, screen, language, timezone
from active_viewers
where last_seen_at >= datetime('now', ?)
order by last_seen_at desc
limit 100
`).all(activeSince).map((row) => ({
session: row.session_id.slice(0, 8),
visitor: row.visitor_id.slice(0, 8),
first_seen_at: row.first_seen_at,
last_seen_at: row.last_seen_at,
page_path: row.page_path,
page_title: row.page_title,
referrer: row.referrer,
browser: row.browser,
os: row.os,
device: row.device,
screen: row.screen,
language: row.language,
timezone: row.timezone,
}))
const topPages = db.prepare(`
select page_path, page_title, count(*) as views
from viewer_events
where event_type = 'page_view'
and occurred_at >= datetime('now', '-24 hours')
group by page_path, page_title
order by views desc, page_path asc
limit 12
`).all()
const clients = db.prepare(`
select browser, os, device, count(*) as events
from viewer_events
where occurred_at >= datetime('now', '-24 hours')
group by browser, os, device
order by events desc
limit 12
`).all()
const totals = db.prepare(`
select
count(*) as events_24h,
count(distinct visitor_id) as visitors_24h,
sum(case when event_type = 'page_view' then 1 else 0 end) as page_views_24h
from viewer_events
where occurred_at >= datetime('now', '-24 hours')
`).get()
return {
active_window_seconds: ANALYTICS_ACTIVE_WINDOW_SECONDS,
generated_at: new Date().toISOString(),
active,
top_pages: topPages,
clients,
totals: {
active_now: active.length,
events_24h: totals?.events_24h || 0,
visitors_24h: totals?.visitors_24h || 0,
page_views_24h: totals?.page_views_24h || 0,
},
privacy: {
retention_days: ANALYTICS_RETENTION_DAYS,
stores_ip_hashes: true,
exposes_raw_ip: false,
},
}
}
function isRateLimited(req) {
const now = Date.now()
const ip = clientIp(req)
@@ -476,36 +759,66 @@ function serveStatic(req, res) {
})
}
http
.createServer((req, res) => {
if (req.url === '/health') {
sendJson(res, 200, { ok: true })
const server = http.createServer((req, res) => {
if (req.url === '/health') {
sendJson(res, 200, { ok: true })
return
}
if (req.method === 'GET' && req.url === '/api/uptime') {
uptimeHistory()
.then((data) => sendJson(res, 200, data))
.catch((error) => sendJson(res, 500, { error: 'Uptime history unavailable', detail: error.message }))
return
}
if (req.method === 'GET' && req.url === '/api/viewers') {
try {
sendJson(res, 200, viewerDashboard())
} catch (error) {
sendJson(res, 500, { error: 'Viewer analytics unavailable', detail: error.message })
}
return
}
if (req.method === 'POST' && req.url === '/api/viewers/event') {
if (!isSameOriginRequest(req)) {
sendJson(res, 403, { error: 'Analytics events are restricted to this site' })
return
}
if (req.method === 'GET' && req.url === '/api/uptime') {
uptimeHistory()
.then((data) => sendJson(res, 200, data))
.catch((error) => sendJson(res, 500, { error: 'Uptime history unavailable', detail: error.message }))
if (isRateLimited(req)) {
sendJson(res, 429, { error: 'Too many analytics events' })
return
}
if (req.method === 'OPTIONS' && req.url.startsWith('/api/')) {
sendJson(res, 403, { error: 'CORS requests are not allowed' })
return
}
readJsonBody(req)
.then((payload) => {
recordViewerEvent(req, payload)
send(res, 204, '', { 'cache-control': 'no-store' })
})
.catch((error) => sendJson(res, 400, { error: error.message }))
return
}
if (req.url.startsWith('/api/')) {
proxyRequest(req, res)
return
}
if (req.method === 'OPTIONS' && req.url.startsWith('/api/')) {
sendJson(res, 403, { error: 'CORS requests are not allowed' })
return
}
serveStatic(req, res)
})
.listen(PORT, '0.0.0.0', () => {
console.log(`tssbot-web serving http://localhost:${PORT}`)
console.log(`proxying API requests to ${API_UPSTREAM}`)
console.log(`sampling uptime every ${Math.round(UPTIME_SAMPLE_INTERVAL_MS / 60000)} minutes`)
console.log(`storing uptime snapshots in ${path.join(uptimeStoragePath(), UPTIME_DATABASE_FILE)}`)
startUptimeSampler()
})
if (req.url.startsWith('/api/')) {
proxyRequest(req, res)
return
}
serveStatic(req, res)
})
server.listen(PORT, '0.0.0.0', () => {
console.log(`tssbot-web serving http://localhost:${PORT}`)
console.log(`proxying API requests to ${API_UPSTREAM}`)
console.log(`sampling uptime every ${Math.round(UPTIME_SAMPLE_INTERVAL_MS / 60000)} minutes`)
console.log(`storing uptime snapshots in ${path.join(uptimeStoragePath(), UPTIME_DATABASE_FILE)}`)
console.log(`storing viewer analytics in ${path.join(uptimeStoragePath(), ANALYTICS_DATABASE_FILE)}`)
startUptimeSampler()
})