diff --git a/BOT/tss_tournaments.py b/BOT/tss_tournaments.py index bf675bb..e5cebb3 100644 --- a/BOT/tss_tournaments.py +++ b/BOT/tss_tournaments.py @@ -17,15 +17,17 @@ the captured sample payloads. from __future__ import annotations import asyncio +from concurrent.futures import ThreadPoolExecutor, as_completed import json import logging +import os import sqlite3 import threading import time import urllib.parse import urllib.request from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Callable, Dict, List, Optional, Tuple from BOT.storage import STORAGE_DIR @@ -36,6 +38,7 @@ TSS_TOURNAMENTS_DB_PATH: Path = STORAGE_DIR / "tss_tournaments.db" _API_URL = "https://tss.warthunder.com/functions.php" _API_HEADERS = {"Content-Type": "application/x-www-form-urlencoded"} _API_TIMEOUT = 20 +_DEFAULT_BATTLE_WORKERS = max(1, int(os.environ.get("TSS_TOURNAMENT_BATTLE_WORKERS", "8"))) # Re-scan a live tournament at most this often; stop re-scanning this long after # its end time (battle rows land shortly after a game finishes). @@ -521,6 +524,8 @@ def build_scan_sync( fallback_name: Optional[str] = None, active_meta: Optional[Dict[str, Any]] = None, now: Optional[int] = None, + battle_workers: int = _DEFAULT_BATTLE_WORKERS, + progress: Optional[Callable[[str], None]] = None, ) -> Dict[str, Any]: """Fetch + assemble the full authoritative structure for one tournament. @@ -563,19 +568,23 @@ def build_scan_sync( # Battles per match → session links. Dedupe match_ids (same id can repeat # across sources); fetch once per (match_id, type_bracket). - battles: List[Dict[str, Any]] = [] + battle_targets: List[Dict[str, Any]] = [] seen_battle_keys = set() for match in matches: mid, tb = match["match_id"], match["type_bracket"] if not mid or (mid, tb) in seen_battle_keys: continue seen_battle_keys.add((mid, tb)) - rows = _request("GET", "getListAllBattles", tournamentID=tournament_id, idMatch=mid, typeBracket=tb) - fill_names_from_battles(match, rows) - match_battles, technical = parse_battles(rows, tournament_id, mid, tb) - if technical and match["status"] in ("pending", "bye"): - match["status"] = "technical" - battles.extend(match_battles) + battle_targets.append(match) + + if progress: + progress(f"{len(matches)} matches; fetching battles for {len(battle_targets)} match rows") + battles = fetch_battles_for_matches( + tournament_id, + battle_targets, + workers=max(1, battle_workers), + progress=progress, + ) type_set = {m["type_bracket"] for m in matches} meta = active_meta or {} @@ -601,6 +610,46 @@ def build_scan_sync( } +def fetch_battles_for_matches( + tournament_id: int, + matches: List[Dict[str, Any]], + *, + workers: int = _DEFAULT_BATTLE_WORKERS, + progress: Optional[Callable[[str], None]] = None, +) -> List[Dict[str, Any]]: + """Fetch getListAllBattles rows for each match concurrently.""" + if not matches: + return [] + + def one(match: Dict[str, Any]) -> Tuple[Dict[str, Any], Any, List[Dict[str, Any]], bool]: + mid, tb = match["match_id"], match["type_bracket"] + rows = _request( + "GET", + "getListAllBattles", + tournamentID=tournament_id, + idMatch=mid, + typeBracket=tb, + ) + match_battles, technical = parse_battles(rows, tournament_id, mid, tb) + return match, rows, match_battles, technical + + battles: List[Dict[str, Any]] = [] + done = 0 + total = len(matches) + with ThreadPoolExecutor(max_workers=max(1, workers)) as pool: + futures = [pool.submit(one, match) for match in matches] + for future in as_completed(futures): + match, rows, match_battles, technical = future.result() + fill_names_from_battles(match, rows) + if technical and match["status"] in ("pending", "bye"): + match["status"] = "technical" + battles.extend(match_battles) + done += 1 + if progress and (done == total or done % 25 == 0): + progress(f"battle lookups {done}/{total}") + return battles + + # --------------------------------------------------------------------------- # Storage # --------------------------------------------------------------------------- diff --git a/scripts/backfill_tournaments.py b/scripts/backfill_tournaments.py index b6afa51..ba4174e 100644 --- a/scripts/backfill_tournaments.py +++ b/scripts/backfill_tournaments.py @@ -8,6 +8,7 @@ import asyncio import pathlib import sqlite3 import sys +import time from typing import List, Optional, Tuple ROOT = pathlib.Path(__file__).resolve().parents[1] @@ -23,7 +24,12 @@ except Exception: pass from BOT.storage import TSS_BATTLES_DB_PATH # noqa: E402 -from BOT.tss_tournaments import init_tss_tournaments_db, scan_and_store # noqa: E402 +from BOT.tss_tournaments import ( # noqa: E402 + TSS_TOURNAMENTS_DB_PATH, + build_scan_sync, + init_tss_tournaments_db, + store_scan, +) async def tournament_ids(limit: Optional[int]) -> List[Tuple[int, Optional[str]]]: @@ -45,24 +51,59 @@ async def tournament_ids(limit: Optional[int]) -> List[Tuple[int, Optional[str]] return [(int(row[0]), row[1]) for row in rows] +def scanned_tournament_ids() -> set[int]: + if not TSS_TOURNAMENTS_DB_PATH.exists(): + return set() + with sqlite3.connect(TSS_TOURNAMENTS_DB_PATH) as conn: + try: + rows = conn.execute( + "SELECT tournament_id FROM tournaments WHERE scanned_unix IS NOT NULL" + ).fetchall() + except sqlite3.OperationalError: + return set() + return {int(row[0]) for row in rows} + + async def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--dry-run", action="store_true") parser.add_argument("--limit", type=int, default=None) parser.add_argument("--sleep", type=float, default=1.0) + parser.add_argument("--battle-workers", type=int, default=8) + parser.add_argument("--rescan", action="store_true", help="rescan tournaments already present in tss_tournaments.db") args = parser.parse_args() rows = await tournament_ids(args.limit) print(f"Found {len(rows)} tournament ids in {TSS_BATTLES_DB_PATH}") + await init_tss_tournaments_db() + if not args.rescan: + done = scanned_tournament_ids() + if done: + before = len(rows) + rows = [(tid, name) for tid, name in rows if tid not in done] + print(f"Skipping {before - len(rows)} already-scanned tournaments ({len(rows)} remaining)") if args.dry_run: for tid, name in rows: print(f" {tid}: {name or 'Tournament ' + str(tid)}") return - await init_tss_tournaments_db() for index, (tid, name) in enumerate(rows, start=1): - print(f"[{index}/{len(rows)}] scanning tournament {tid}") - await scan_and_store(tid, fallback_name=name) + started = time.monotonic() + print(f"[{index}/{len(rows)}] scanning tournament {tid}", flush=True) + scan = build_scan_sync( + tid, + fallback_name=name, + battle_workers=args.battle_workers, + progress=lambda msg, tid=tid: print(f" {tid}: {msg}", flush=True), + ) + await store_scan(scan) + elapsed = time.monotonic() - started + print( + f" stored {tid}: {scan['match_count']} matches, " + f"{len(scan['battles'])} battles, {len(scan['standings'])} standings " + f"({scan['status']}) in {elapsed:.1f}s", + flush=True, + ) if args.sleep and index < len(rows): await asyncio.sleep(args.sleep) diff --git a/tests/test_tss_tournaments.py b/tests/test_tss_tournaments.py index 8efe286..c52d931 100644 --- a/tests/test_tss_tournaments.py +++ b/tests/test_tss_tournaments.py @@ -172,6 +172,34 @@ def test_fill_names_from_battles_by_uuid(): assert match["winner_name"] == "NUGOB" +def test_fetch_battles_for_matches_concurrent(monkeypatch): + calls = [] + + def fake_request(method, action, **params): + calls.append((method, action, params["idMatch"])) + return [{ + "url": "224584316650954636", + "position": 0, + "statusReplay": "view replay", + "winner": "NUGOB", + "teamA": {"teamName": "uuid-a", "realName": "NUGOB"}, + "teamB": {"teamName": "uuid-b", "realName": "GRIDAC"}, + }] + + monkeypatch.setattr(tt, "_request", fake_request) + matches = [{ + "match_id": "m1", "type_bracket": "Winner", "status": "played", + "team_a_uuid": "uuid-a", "team_a_name": None, + "team_b_uuid": "uuid-b", "team_b_name": None, + "winner_name": None, "score_a": 1, "score_b": 0, + }] + battles = tt.fetch_battles_for_matches(123, matches, workers=2) + assert len(calls) == 1 + assert battles[0]["session_hex"] == "31de23f001a9f8c" + assert matches[0]["team_a_name"] == "NUGOB" + assert matches[0]["team_b_name"] == "GRIDAC" + + def test_compute_status(): played = [{"status": "played"}, {"status": "bye"}] mixed = [{"status": "played"}, {"status": "pending"}]