added feat

This commit is contained in:
Clippii
2026-05-17 12:11:11 +01:00
parent ff379c7843
commit d316d8fd61
4 changed files with 159 additions and 27 deletions
+11 -25
View File
@@ -53,7 +53,7 @@ from .game_api import (
obtain_clan_new_points, obtain_clan_new_points,
obtain_clans_leaderboard, obtain_clans_leaderboard,
) )
from .health import init_health, get_health_snapshot from .health import init_health, get_health_snapshot, get_recent_ttl_stats
from .utils import t, guild_lang from .utils import t, guild_lang
from .lux_apis import fetch_replay_by_id from .lux_apis import fetch_replay_by_id
from .meta_manager import ( from .meta_manager import (
@@ -8823,17 +8823,10 @@ async def bot_status_public(interaction: discord.Interaction):
avg_delay: int | None = None avg_delay: int | None = None
sample_size = 0 sample_size = 0
try: try:
async with aiosqlite.connect(SQ_BATTLES_DB_PATH, timeout=10.0) as db: stats = await get_recent_ttl_stats(limit=30)
rows = list(await db.execute_fetchall( avg_delay = stats["avg_delay"]
"SELECT endtime_unix, received_unix FROM match_summary " sample_size = stats["sample_size"]
"WHERE received_unix IS NOT NULL AND endtime_unix IS NOT NULL " last_received_ts = stats["last_received_ts"]
"ORDER BY endtime_unix DESC LIMIT 30"
))
if rows:
delays = [max(int(r[1]) - int(r[0]), 0) for r in rows]
avg_delay = int(sum(delays) / len(delays))
sample_size = len(delays)
last_received_ts = max(int(r[1]) for r in rows)
except Exception: except Exception:
logging.exception("Failed to compute /bot-status TTL stats") logging.exception("Failed to compute /bot-status TTL stats")
@@ -8989,23 +8982,16 @@ async def bot_status(interaction: discord.Interaction):
# Avg TTL (Spectra receive delay) for the last 30 games # Avg TTL (Spectra receive delay) for the last 30 games
try: try:
async with aiosqlite.connect(SQ_BATTLES_DB_PATH, timeout=10.0) as db: ttl_stats = await get_recent_ttl_stats(limit=30)
ttl_rows = list(await db.execute_fetchall( if ttl_stats["avg_delay"] is not None:
"SELECT endtime_unix, received_unix FROM match_summary " a_min, a_sec = divmod(ttl_stats["avg_delay"], 60)
"WHERE received_unix IS NOT NULL AND endtime_unix IS NOT NULL " mn_min, mn_sec = divmod(ttl_stats["min_delay"], 60)
"ORDER BY endtime_unix DESC LIMIT 30" mx_min, mx_sec = divmod(ttl_stats["max_delay"], 60)
))
if ttl_rows:
delays = [max(int(r[1]) - int(r[0]), 0) for r in ttl_rows]
avg = sum(delays) / len(delays)
a_min, a_sec = divmod(int(avg), 60)
mn_min, mn_sec = divmod(min(delays), 60)
mx_min, mx_sec = divmod(max(delays), 60)
ttl_value = ( ttl_value = (
f"**Avg:** {a_min}m{a_sec:02d}s • " f"**Avg:** {a_min}m{a_sec:02d}s • "
f"**Min:** {mn_min}m{mn_sec:02d}s • " f"**Min:** {mn_min}m{mn_sec:02d}s • "
f"**Max:** {mx_min}m{mx_sec:02d}s • " f"**Max:** {mx_min}m{mx_sec:02d}s • "
f"**N:** {len(delays)}" f"**N:** {ttl_stats['sample_size']}"
) )
else: else:
ttl_value = t("en", "dev.health_never") ttl_value = t("en", "dev.health_never")
+31 -1
View File
@@ -14,9 +14,10 @@ from pathlib import Path
# Third-Party Library Imports # Third-Party Library Imports
import aiofiles import aiofiles
import aiosqlite
# Local Module Imports # Local Module Imports
from .utils import STORAGE_DIR, get_bot from .utils import STORAGE_DIR, SQ_BATTLES_DB_PATH, get_bot
HEALTH_PATH = STORAGE_DIR / "bot_health.json" HEALTH_PATH = STORAGE_DIR / "bot_health.json"
@@ -130,3 +131,32 @@ async def get_health_snapshot() -> dict:
_health_state["games_processed_24h"] = games_24h _health_state["games_processed_24h"] = games_24h
return dict(_health_state) return dict(_health_state)
async def get_recent_ttl_stats(limit: int = 30) -> dict:
"""Return receive-delay stats for the most recent completed games."""
async with aiosqlite.connect(SQ_BATTLES_DB_PATH, timeout=10.0) as db:
rows = list(await db.execute_fetchall(
"SELECT endtime_unix, received_unix FROM match_summary "
"WHERE received_unix IS NOT NULL AND endtime_unix IS NOT NULL "
"ORDER BY endtime_unix DESC LIMIT ?",
(limit,),
))
if not rows:
return {
"sample_size": 0,
"avg_delay": None,
"min_delay": None,
"max_delay": None,
"last_received_ts": None,
}
delays = [max(int(received) - int(ended), 0) for ended, received in rows]
return {
"sample_size": len(delays),
"avg_delay": int(sum(delays) / len(delays)),
"min_delay": min(delays),
"max_delay": max(delays),
"last_received_ts": max(int(received) for _, received in rows),
}
+116 -1
View File
@@ -8,20 +8,23 @@ on configured intervals.
# Standard Library Imports # Standard Library Imports
import asyncio import asyncio
import os
import json import json
import logging import logging
import shutil import shutil
import time
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
# Third-Party Library Imports # Third-Party Library Imports
import aiohttp
from discord.ext import tasks from discord.ext import tasks
# Local Module Imports # Local Module Imports
from . import lux_apis from . import lux_apis
from .autologging import handle_ws_replays, handle_gob_message from .autologging import handle_ws_replays, handle_gob_message
from .health import record_task_run, write_heartbeat from .health import get_recent_ttl_stats, record_task_run, write_heartbeat
from .meta_manager import process_all_players, sync_all_guild_metas from .meta_manager import process_all_players, sync_all_guild_metas
from .task_executors import ( from .task_executors import (
execute_ldb_alarm_task, execute_ldb_alarm_task,
@@ -50,6 +53,116 @@ async def _record(task_name: str, success: bool, error: str = ""):
await record_task_run(task_name, success, error) await record_task_run(task_name, success, error)
def _format_duration(seconds: int) -> str:
minutes, rem = divmod(int(seconds), 60)
return f"{minutes}m {rem:02d}s"
async def _send_ttl_alert_webhook(content: str) -> bool:
webhook_url = os.environ.get("SREBOT_TTL_ALERT_WEBHOOK_URL", "").strip()
if not webhook_url:
logging.warning("[TTL-ALERT] SREBOT_TTL_ALERT_WEBHOOK_URL is not configured.")
return False
timeout = aiohttp.ClientTimeout(total=15)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.post(webhook_url, json={"content": content}) as response:
if 200 <= response.status < 300:
return True
body = await response.text()
logging.error("[TTL-ALERT] Webhook failed with HTTP %s: %s", response.status, body[:500])
return False
_TTL_ALERT_HIGH_THRESHOLD_SECONDS = 20 * 60
_TTL_ALERT_RECOVERY_THRESHOLD_SECONDS = 10 * 60
_TTL_ALERT_HOLD_SECONDS = 10 * 60
_ttl_alert_high_since: Optional[float] = None
_ttl_alert_active = False
async def _execute_ttl_alert_check() -> None:
global _ttl_alert_high_since, _ttl_alert_active
stats = await get_recent_ttl_stats(limit=30)
avg_delay = stats["avg_delay"]
sample_size = stats["sample_size"]
last_received_ts = stats["last_received_ts"]
if avg_delay is None:
_ttl_alert_high_since = None
logging.info("[TTL-ALERT] No TTL data available.")
return
now = time.time()
if not _ttl_alert_active:
if avg_delay > _TTL_ALERT_HIGH_THRESHOLD_SECONDS:
if _ttl_alert_high_since is None:
_ttl_alert_high_since = now
logging.warning(
"[TTL-ALERT] Avg TTL above threshold: %s across %s games.",
_format_duration(avg_delay),
sample_size,
)
return
if now - _ttl_alert_high_since >= _TTL_ALERT_HOLD_SECONDS:
content = (
":warning: **SRE Bot TTL alert**\n"
f"Average TTL across the last {sample_size} games is "
f"**{_format_duration(avg_delay)}**, and has stayed above "
f"{_format_duration(_TTL_ALERT_HIGH_THRESHOLD_SECONDS)} for at least "
f"{_format_duration(_TTL_ALERT_HOLD_SECONDS)}.\n"
"Historical game messages may be delayed or out of date."
)
if last_received_ts:
content += f"\nLast game received: <t:{int(last_received_ts)}:R>."
sent = await _send_ttl_alert_webhook(content)
if sent:
_ttl_alert_active = True
logging.warning("[TTL-ALERT] High TTL webhook sent.")
return
_ttl_alert_high_since = None
return
if avg_delay < _TTL_ALERT_RECOVERY_THRESHOLD_SECONDS:
content = (
":white_check_mark: **SRE Bot TTL recovered**\n"
f"Average TTL across the last {sample_size} games is now "
f"**{_format_duration(avg_delay)}**, below "
f"{_format_duration(_TTL_ALERT_RECOVERY_THRESHOLD_SECONDS)}. "
"Back at normal operations."
)
if last_received_ts:
content += f"\nLast game received: <t:{int(last_received_ts)}:R>."
sent = await _send_ttl_alert_webhook(content)
if sent:
_ttl_alert_active = False
_ttl_alert_high_since = None
logging.info("[TTL-ALERT] Recovery webhook sent.")
# ============================================================================
# TTL ALERT TASK
# ============================================================================
@tasks.loop(minutes=10)
async def ttl_alert_task():
"""Alert when recent game receive TTL is elevated for a sustained period."""
try:
await _execute_ttl_alert_check()
await _record("ttl_alert", True)
except Exception as e:
await _record("ttl_alert", False, str(e))
logging.error("[TTL-ALERT] Failed to check TTL alert state: %s", e)
@ttl_alert_task.before_loop
async def before_ttl_alert_task():
await get_bot().wait_until_ready()
# ============================================================================ # ============================================================================
# LEADERBOARD ALARM TASK # LEADERBOARD ALARM TASK
# ============================================================================ # ============================================================================
@@ -516,6 +629,7 @@ async def start_all_tasks():
replay_cleanup_task.start() replay_cleanup_task.start()
health_heartbeat_task.start() health_heartbeat_task.start()
weekly_br_report_task.start() weekly_br_report_task.start()
ttl_alert_task.start()
# Phase 2: WebSocket listeners # Phase 2: WebSocket listeners
ws_autolog_task.start() ws_autolog_task.start()
ws_gob_task.start() ws_gob_task.start()
@@ -541,3 +655,4 @@ def stop_all_tasks():
sync_guild_metas_task.cancel() sync_guild_metas_task.cancel()
health_heartbeat_task.cancel() health_heartbeat_task.cancel()
weekly_br_report_task.cancel() weekly_br_report_task.cancel()
ttl_alert_task.cancel()
+1
View File
@@ -35,6 +35,7 @@
SREBOT_API_PORT=6000 SREBOT_API_PORT=6000
SREBOT_WEB_PORT=3001 SREBOT_WEB_PORT=3001
SREBOT_WEBHOOK_PORT=9000 SREBOT_WEBHOOK_PORT=9000
SREBOT_TTL_ALERT_WEBHOOK_URL=https://discord.com/api/webhooks/...
SREBOT_EXTERNAL_HOST=0.0.0.0 SREBOT_EXTERNAL_HOST=0.0.0.0
SREBOT_EXTERNAL_PORT=18081 SREBOT_EXTERNAL_PORT=18081
SREBOT_EXTERNAL_BEARER_TOKEN=your_external_bridge_token # Optional, protects the bridge API and websocket SREBOT_EXTERNAL_BEARER_TOKEN=your_external_bridge_token # Optional, protects the bridge API and websocket