From 4b75ce1533ce5a6b5f112c9cf386623bc927439d Mon Sep 17 00:00:00 2001 From: NotSoToothless <67082114+FURRO404@users.noreply.github.com> Date: Sun, 14 Jun 2026 21:11:57 -0700 Subject: [PATCH] =?UTF-8?q?Auto=20merge=20dev=20=E2=86=92=20main=20(#1324)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update game files * update files and capture raw spectra payload --- BOT/autologging.py | 34 ++++++++++++++++++++++++++++++++++ BOT/task_executors.py | 2 +- BOT/utils.py | 13 +++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/BOT/autologging.py b/BOT/autologging.py index 65c7930..db49367 100644 --- a/BOT/autologging.py +++ b/BOT/autologging.py @@ -39,7 +39,9 @@ from .utils import ( STORAGE_DIR, CACHE_DIR, replay_data_path, + raw_replay_data_path, replay_session_dir, + STORE_RAW_REPLAY, SQ_BATTLES_DB_PATH, SQUADRONS_DB_PATH, blacklisted_guilds, @@ -399,6 +401,15 @@ def load_replay_data_from_disk(session_id: str): return None +def load_raw_replay_data_from_disk(session_id: str): + """Load the unmodified Spectra payload (RAW_replay_data.json.gz), pre-transform.""" + path = raw_replay_data_path(session_id) + if path.is_file(): + with gzip.open(path, "rt", encoding="utf-8") as f: + return json.load(f) + return None + + # ============================================================================ # WEBSOCKET HANDLERS # ============================================================================ @@ -441,6 +452,29 @@ async def process_ws_replays(replays: list[dict]): logging.info(f"[WS] Skipping non-sqb replay (type={replay_type!r})") continue + # Store the unmodified Spectra payload before transforming, so the raw + # data can be pulled and re-processed later. Keyed off the raw _id (the + # same value transform_to_local_format converts to the hex session id), + # so it lands in the canonical replay dir even if the transform fails. + if STORE_RAW_REPLAY: + try: + raw_id = replay.get("_id") if replay.get("_id") is not None else replay.get("id") + raw_hex_id = hex(int(raw_id)).replace("0x", "") if raw_id is not None else "" + except (ValueError, TypeError): + raw_hex_id = "" + if raw_hex_id: + raw_path = raw_replay_data_path(raw_hex_id) + if not raw_path.exists(): + try: + raw_path.parent.mkdir(parents=True, exist_ok=True) + raw_bytes = json.dumps(replay, ensure_ascii=False).encode("utf-8") + compressed_raw = await asyncio.to_thread(gzip.compress, raw_bytes) + async with aiofiles.open(raw_path, "wb") as f: + await f.write(compressed_raw) + logging.info(f"[WSS] Saved RAW {raw_hex_id} ({len(compressed_raw)} bytes compressed)") + except Exception as e: + logging.error(f"[WSS] Failed to save RAW replay {raw_hex_id}: {e}") + # Transform to local format first - it handles _id -> hex conversion wrapped = {'completed': [replay]} local_data = utils.transform_to_local_format(wrapped) diff --git a/BOT/task_executors.py b/BOT/task_executors.py index 8644791..a833de8 100644 --- a/BOT/task_executors.py +++ b/BOT/task_executors.py @@ -292,7 +292,7 @@ async def cleanup_replays(): whenever files inside are added or removed (including by this cleanup), which would otherwise keep dirs perpetually "fresh". """ - KEEP_FILES = {"replay_data.json.gz"} + KEEP_FILES = {"replay_data.json.gz", "RAW_replay_data.json.gz"} def _sync_cleanup_replays(): """Synchronous helper that walks replay dirs and deletes stale files.""" diff --git a/BOT/utils.py b/BOT/utils.py index 87ca6f0..bce354e 100644 --- a/BOT/utils.py +++ b/BOT/utils.py @@ -92,6 +92,19 @@ def replay_session_dir(session_id: str | int) -> Path: def replay_data_path(session_id: str | int) -> Path: return replay_session_dir(session_id) / "replay_data.json.gz" + +# When enabled, the unmodified Spectra payload is stored next to the transformed +# replay (as RAW_replay_data.json.gz) so it can be pulled and re-processed later. +# Toggle off by setting SRE_STORE_RAW_REPLAY=0 in the environment. +STORE_RAW_REPLAY = os.getenv("SRE_STORE_RAW_REPLAY", "1").strip().lower() not in ( + "0", "false", "no", "off", "", +) + + +def raw_replay_data_path(session_id: str | int) -> Path: + """Path to the unmodified Spectra payload for a session (pre-transform).""" + return replay_session_dir(session_id) / "RAW_replay_data.json.gz" + # Dev team Discord user IDs (bot owner + trusted devs) DEV_DISCORD_IDS: set[int] = { 1357793112277127290,