#!/usr/bin/env python3
import os, json, hashlib, logging, asyncio, re, aiohttp
from datetime import datetime
from pathlib import Path
from xml.etree import ElementTree as ET
from bs4 import BeautifulSoup

TELEGRAM_TOKEN   = os.environ.get("NAVMON_TG_TOKEN", "")
TELEGRAM_CHAT_ID = os.environ.get("NAVMON_TG_CHAT_ID", "")
VAPID_EMAIL      = os.environ.get("NAVMON_VAPID_EMAIL", "navmon@nautiq-lodan.com")

BASE_DIR     = Path(__file__).parent
CACHE_FILE   = BASE_DIR / "cache.json"
HISTORY_FILE = BASE_DIR / "history.json"
SUBS_FILE    = BASE_DIR / "subscriptions.json"
VAPID_FILE   = BASE_DIR / "vapid_keys.json"
LOG_FILE     = BASE_DIR / "navmon.log"

SOURCES = [
    {"id":"afdj_avize","name":"AFDJ Avízy 🇷🇴","country":"Rumunsko","url":"https://www.afdj.ro/ro/content/avize-catre-navigatori","parser":"afdj_avize","base_url":"https://www.afdj.ro","emoji":"📋"},
    {"id":"afdj_cotele","name":"AFDJ Hladiny 🇷🇴","country":"Rumunsko","url":"https://www.afdj.ro/ro/cotele-dunarii","parser":"afdj_cotele","base_url":"https://www.afdj.ro","emoji":"📉"},
    {"id":"appd_izvestia","name":"APPD Notices 🇧🇬","country":"Bulharsko","url":"https://www.appd-bg.org/navigationroad","parser":"appd_izvestia","base_url":"https://www.appd-bg.org","emoji":"📋"},
    {"id":"appd_news","name":"APPD News 🇧🇬","country":"Bulharsko","url":"https://www.appd-bg.org/news","parser":"generic_links","base_url":"https://www.appd-bg.org","selector":"a[href*='newsinfo'], h3 a","emoji":"📢"},
    {"id":"plovput_rss","name":"Plovput News 🇷🇸","country":"Srbsko","url":"https://www.plovput.rs/rss/index.php","parser":"rss","base_url":"https://www.plovput.rs","emoji":"📢"},
    {"id":"plovput_gabariti","name":"Plovput Surveys 🇷🇸","country":"Srbsko","url":"https://www.plovput.rs/izvestaj-o-merenju-gabarita","parser":"plovput_gabariti","base_url":"https://www.plovput.rs","emoji":"📏"},
]

WATCH_STATIONS = {"Bazias","Moldova Veche","Drobeta Turnu Severin","Gruia","Calafat"}
WATER_LEVEL_DROP_CM = 20

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()])
log = logging.getLogger("navmon")

def load_json(path, default):
    if path.exists():
        try: return json.loads(path.read_text(encoding="utf-8"))
        except: pass
    return default

def save_json(path, data):
    path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")

def item_key(item):
    return hashlib.md5(f"{item.get('text','').strip()[:120]}|{item.get('url','')}".encode()).hexdigest()

USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Version/17.4 Safari/605.1.15",
]

def get_headers(attempt=0):
    return {"User-Agent": USER_AGENTS[attempt % len(USER_AGENTS)], "Accept": "text/html,*/*;q=0.9", "Accept-Language": "en-US,en;q=0.9"}

async def fetch(session, url):
    for attempt in range(3):
        try:
            async with session.get(url, headers=get_headers(attempt), timeout=aiohttp.ClientTimeout(total=30), allow_redirects=True) as r:
                if r.status == 403 and attempt < 2:
                    await asyncio.sleep(3); continue
                if r.status >= 400:
                    log.warning(f"  HTTP {r.status} {url}"); return None
                return await r.text(errors="replace")
        except Exception as e:
            if attempt == 2: log.warning(f"  Chyba {url}: {e}")
            await asyncio.sleep(2)
    return None

def parse_afdj_avize(html, source):
    soup = BeautifulSoup(html, "html.parser")
    items = []
    for a in soup.select("table td a[href]"):
        href = a.get("href", "")
        if "avize" not in href.lower() and "aviz" not in href.lower(): continue
        text = a.get_text(strip=True)
        if not text: continue
        td = a.find_parent("td")
        date_text = ""
        if td:
            next_td = td.find_next_sibling("td")
            if next_td: date_text = next_td.get_text(strip=True)
        label = f"{text} ({date_text})" if date_text else text
        full_url = href if href.startswith("http") else source["base_url"] + href
        items.append({"text": label, "url": full_url})
    return items

def parse_afdj_cotele(html, source):
    soup = BeautifulSoup(html, "html.parser")
    items = []
    for h2 in soup.find_all("h2"):
        station_name = h2.get_text(strip=True)
        content_parts = []
        for sib in h2.next_siblings:
            if sib.name == "h2": break
            if hasattr(sib, "get_text"): content_parts.append(sib.get_text(" ", strip=True))
        content = " ".join(content_parts)
        cota_m = re.search(r"Cota[\s:]+(-?\d+)\s*cm", content, re.I)
        var_m  = re.search(r"Varia[tț]ia[\s:]+(-?\d+)", content, re.I)
        if not cota_m: continue
        cota = int(cota_m.group(1))
        variatie = int(var_m.group(1)) if var_m else 0
        if not any(w.lower() in station_name.lower() for w in WATCH_STATIONS): continue
        if variatie >= -WATER_LEVEL_DROP_CM: continue
        items.append({"text": f"{station_name}: {cota} cm (pokles {variatie} cm/deň) ⚠️", "url": source["base_url"] + "/ro/cotele-dunarii"})
    return items

def parse_appd_izvestia(html, source):
    soup = BeautifulSoup(html, "html.parser")
    items = []
    seen = set()
    for a in soup.select("a[href*='navigationroad?id=']"):
        text = a.get_text(strip=True)
        href = a.get("href", "")
        if not href or not re.search(r"\d+", text): continue
        full_url = href if href.startswith("http") else source["base_url"] + ("" if href.startswith("/") else "/") + href
        if full_url in seen: continue
        seen.add(full_url)
        items.append({"text": text, "url": full_url})
    for h5 in soup.select("h5"):
        if h5.find("a"): continue
        text = h5.get_text(strip=True)
        if not re.search(r"\d+", text): continue
        url = source["base_url"] + "/navigationroad"
        if url in seen: continue
        seen.add(url)
        items.insert(0, {"text": text + " (aktualeno)", "url": url})
    return items[:30]

def parse_rss(xml_text, source):
    items = []
    try:
        root = ET.fromstring(xml_text.encode("utf-8", errors="replace"))
        for item in root.iter("item"):
            title = (item.findtext("title") or "").strip()
            link  = (item.findtext("link")  or "").strip()
            desc  = (item.findtext("description") or "").strip()
            if not title or not link: continue
            text = f"{title} — {desc[:80]}" if desc else title
            items.append({"text": text[:200], "url": link})
    except ET.ParseError as e:
        log.warning(f"  RSS parse error: {e}")
    return items

def parse_plovput_gabariti(html, source):
    soup = BeautifulSoup(html, "html.parser")
    items = []
    seen = set()
    for a in soup.select("a[href*='izvestaj-o-merenju-gabarita'][href$='.pdf']"):
        href = a.get("href", "")
        text = a.get_text(strip=True) or href.split("/")[-1].replace("-", " ").replace(".pdf", "")
        if not text: continue
        full_url = href if href.startswith("http") else source["base_url"] + href
        if full_url in seen: continue
        seen.add(full_url)
        yr_m = re.search(r"/(\d{4})/", href)
        if yr_m and yr_m.group(1) not in text: text = f"{text} ({yr_m.group(1)})"
        items.append({"text": text[:200], "url": full_url})
    return items[:30]

def parse_generic_links(html, source):
    soup = BeautifulSoup(html, "html.parser")
    items, seen = [], set()
    NAV_SKIP = {"english","sitemap","contact","home","back","next","prev","известия"}
    for a in soup.select(source.get("selector", "table td a")):
        text = a.get_text(strip=True)
        href = a.get("href", "")
        if not text or len(text) < 5: continue
        if not href or href in ("#", "/"): continue
        if text.lower() in NAV_SKIP: continue
        full_url = href if href.startswith("http") else source["base_url"] + href
        key = f"{text[:80]}|{full_url}"
        if key in seen: continue
        seen.add(key)
        items.append({"text": text[:200], "url": full_url})
    return items[:40]

PARSERS = {
    "afdj_avize": parse_afdj_avize,
    "afdj_cotele": parse_afdj_cotele,
    "appd_izvestia": parse_appd_izvestia,
    "rss": parse_rss,
    "plovput_gabariti": parse_plovput_gabariti,
    "generic_links": parse_generic_links,
}

async def send_telegram(session, source, new_items):
    if not TELEGRAM_TOKEN: return
    count = len(new_items)
    lines = [f"{source['emoji']} <b>{source['name']}</b> — {count} nov{'é' if count==1 else 'ých'}\n"]
    for item in new_items[:5]:
        lines.append(f"• <a href='{item['url']}'>{item['text'][:100]}</a>")
    if count > 5: lines.append(f"  … a ďalších {count-5}")
    lines.append(f"\n🕐 {datetime.now().strftime('%d.%m.%Y %H:%M')}")
    try:
        async with session.post(f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage",
            json={"chat_id": TELEGRAM_CHAT_ID, "text": "\n".join(lines), "parse_mode": "HTML", "disable_web_page_preview": True},
            timeout=aiohttp.ClientTimeout(total=15)) as r:
            if r.status != 200: log.warning(f"Telegram HTTP {r.status}")
            else: log.info("  Telegram OK")
    except Exception as e: log.warning(f"Telegram chyba: {e}")

def send_web_push_all(source, new_items):
    try: from pywebpush import webpush, WebPushException
    except ImportError: return
    if not VAPID_FILE.exists(): return
    keys = load_json(VAPID_FILE, {})
    vapid_priv = keys.get("private_key_pem", "")
    if not vapid_priv: return
    subs = load_json(SUBS_FILE, [])
    if not subs: return
    count = len(new_items)
    payload = json.dumps({
        "title": f"{source['emoji']} {source['name']} — {count} nov{'é' if count==1 else 'ých'}",
        "body": new_items[0]["text"][:100] if count==1 else "\n".join(i["text"][:60] for i in new_items[:2]),
        "source_id": source["id"],
        "items": [{"text": i["text"][:80], "url": i["url"]} for i in new_items[:3]],
    }, ensure_ascii=False)
    dead = []
    for sub in subs:
        try:
            webpush(subscription_info=sub, data=payload, vapid_private_key=vapid_priv, vapid_claims={"sub": f"mailto:{VAPID_EMAIL}"})
        except Exception as e:
            resp = getattr(e, "response", None)
            if resp and resp.status_code in (404, 410): dead.append(sub)
    if dead:
        save_json(SUBS_FILE, [s for s in subs if s not in dead])
    log.info(f"  WebPush OK ({len(subs)-len(dead)} odberatelov)")

async def check_source(session, source, cache):
    log.info(f"  [{source['id']}] {source['url']}")
    html = await fetch(session, source["url"])
    if not html: return []
    parser_fn = PARSERS.get(source["parser"])
    if not parser_fn: return []
    try: items = parser_fn(html, source)
    except Exception as e: log.error(f"  Parser chyba: {e}"); return []
    if not items: return []
    prev_keys = set(cache.get(source["id"], {}).get("keys", []))
    curr_keys = {item_key(i) for i in items}
    new_items = [i for i in items if item_key(i) in (curr_keys - prev_keys)]
    cache[source["id"]] = {"keys": list(curr_keys), "last_check": datetime.now().isoformat(), "count": len(items), "url": source["url"]}
    log.info(f"  [{source['id']}] {len(items)} celkom, {len(new_items)} novych")
    return new_items

async def run():
    cache   = load_json(CACHE_FILE, {})
    history = load_json(HISTORY_FILE, [])
    first_run = len(cache) == 0
    if first_run: log.info("Prve spustenie - kesuje bez notifikacii")
    connector = aiohttp.TCPConnector(ssl=False)
    async with aiohttp.ClientSession(connector=connector) as session:
        for source in SOURCES:
            new_items = await check_source(session, source, cache)
            if new_items:
                ts = datetime.now().isoformat()
                for item in new_items:
                    history.append({**item, "source_id": source["id"], "source_name": source["name"], "country": source["country"], "emoji": source["emoji"], "found_at": ts})
                if not first_run:
                    await send_telegram(session, source, new_items)
                    send_web_push_all(source, new_items)
            await asyncio.sleep(2)
    save_json(CACHE_FILE, cache)
    save_json(HISTORY_FILE, history[-1000:])
    log.info(f"Hotovo. Historia: {len(history)} zaznamov\n")

if __name__ == "__main__":
    log.info("=== NavMon start ===")
    asyncio.run(run())