from __future__ import annotations
import asyncio, hashlib, time, re
from typing import Any
import aiohttp
from bs4 import BeautifulSoup
from config import settings
from utils.cache import TTLCache
from utils.rate_limiter import AsyncRateLimiter
from utils.retry import async_retry

class FlashscoreParser:
    def __init__(self) -> None:
        self.cache = TTLCache(ttl_s=settings.PARSER_CACHE_TTL_S)
        self.limiter = AsyncRateLimiter(min_delay_s=settings.PARSER_MIN_DELAY_S)

    async def _fetch_aiohttp(self, url: str) -> str:
        await self.limiter.wait()
        timeout = aiohttp.ClientTimeout(total=35)
        async with aiohttp.ClientSession(timeout=timeout, headers={"User-Agent": "Mozilla/5.0"}) as s:
            async with s.get(url) as r:
                r.raise_for_status()
                return await r.text()

    async def _fetch_playwright(self, url: str) -> str:
        await self.limiter.wait()
        from playwright.async_api import async_playwright
        async with async_playwright() as p:
            browser = await p.chromium.launch(headless=settings.PARSER_HEADLESS)
            page = await browser.new_page()
            await page.goto(url, wait_until="domcontentloaded", timeout=45000)
            await asyncio.sleep(1.2)
            html = await page.content()
            await browser.close()
            return html

    async def fetch_html(self, url: str) -> str:
        cached = self.cache.get(url)
        if cached:
            return cached
        if settings.PARSER_USE_PLAYWRIGHT:
            html = await self._fetch_playwright(url)
        else:
            html = await self._fetch_aiohttp(url)
        self.cache.set(url, html)
        return html

    def _normalize_team(self, s: str) -> str:
        s = re.sub(r"\s+", " ", s.strip())
        return s[:160]

    def _make_id(self, competition_url: str, home: str, away: str, href: str) -> str:
        base = f"{competition_url}|{home}|{away}|{href}"
        return hashlib.sha1(base.encode("utf-8")).hexdigest()[:24]

    def parse_match_cards(self, competition_url: str, html: str) -> list[dict[str, Any]]:
        soup = BeautifulSoup(html, "lxml")
        matches: list[dict[str, Any]] = []

        # Heuristic: find anchors containing " - " for teams, plus href
        for a in soup.find_all("a", href=True):
            t = a.get_text(" ", strip=True)
            if " - " not in t:
                continue
            home, away = t.split(" - ", 1)
            home = self._normalize_team(home)
            away = self._normalize_team(away)
            href = a["href"]
            match_url = href
            if href.startswith("/"):
                match_url = "https://www.flashscore.com" + href
            mid = self._make_id(competition_url, home, away, match_url)
            matches.append({
                "id": mid,
                "source": "flashscore",
                "competition_url": competition_url,
                "match_url": match_url,
                "kickoff_ts": None,  # flashscore often requires deeper parsing
                "status": "scheduled",
                "home_team": home,
                "away_team": away,
                "minute": None,
                "home_score": None,
                "away_score": None,
            })

        # de-dup by id
        uniq = {}
        for m in matches:
            uniq[m["id"]] = m
        return list(uniq.values())

    def parse_score_from_detail(self, match_url: str, html: str) -> dict[str, Any]:
        soup = BeautifulSoup(html, "lxml")
        text = soup.get_text(" ", strip=True)

        # score pattern "1:0" or "1 - 0"
        score = None
        m = re.search(r"(\d+)\s*[:\-]\s*(\d+)", text)
        if m:
            score = (int(m.group(1)), int(m.group(2)))

        # minute pattern like "62'" somewhere
        minute = None
        mm = re.search(r"(\d{1,3})\s*'", text)
        if mm:
            minute = int(mm.group(1))

        status = "unknown"
        if "Finished" in text or "FT" in text or "Match finished" in text:
            status = "finished"
        elif minute is not None:
            status = "live"
        else:
            status = "scheduled"

        out = {"match_url": match_url, "status": status, "minute": minute}
        if score:
            out["home_score"], out["away_score"] = score
        return out

    @async_retry(attempts=3)
    async def fetch_competition_matches(self, competition_url: str) -> list[dict[str, Any]]:
        html = await self.fetch_html(competition_url)
        return self.parse_match_cards(competition_url, html)

    @async_retry(attempts=3)
    async def fetch_match_detail_update(self, match_url: str) -> dict[str, Any]:
        html = await self.fetch_html(match_url)
        return self.parse_score_from_detail(match_url, html)
