#!/usr/bin/env python3
"""Monitor Senate Banking Committee markups and alert on new ones."""
import json, os, hashlib, re
from datetime import datetime, timedelta, timezone
import requests

DIR = os.path.dirname(os.path.abspath(__file__))
CFG = json.load(open(os.path.join(DIR, 'config.json')))
BOT = f"https://api.telegram.org/bot{CFG['telegram_bot_token']}"
KST = timezone(timedelta(hours=9))
SEEN_FILE = os.path.join(DIR, 'seen_markups.json')
URL = 'https://www.banking.senate.gov/markups'


def load_seen():
    if os.path.exists(SEEN_FILE):
        return json.load(open(SEEN_FILE))
    return []


def save_seen(seen):
    with open(SEEN_FILE, 'w') as f:
        json.dump(seen, f)


def fetch_markups():
    """Scrape markup list from Senate Banking Committee."""
    r = requests.get(URL, headers={'User-Agent': 'Mozilla/5.0'}, timeout=15)
    r.raise_for_status()
    html = r.text

    markups = []
    # Split by vevent rows
    parts = re.split(r'<tr\s+class="vevent">', html)
    for part in parts[1:]:  # skip before first match
        end = part.find('</tr>')
        if end > 0:
            block = part[:end]
        else:
            block = part

        link = re.search(r'href="(/hearings/[^"]+)"', block)
        if not link:
            continue
        href = link.group(1)

        # Extract all text
        text = re.sub(r'<[^>]+>', ' ', block)
        text = re.sub(r'\s+', ' ', text).strip()

        # Extract date
        date_match = re.search(r'(\d{2}/\d{2}/\d{2}(?:\s+\d{1,2}:\d{2}[AP]M)?)', text)
        date_str = date_match.group(1).strip() if date_match else ''

        # Extract title (after "Markup:" or "Executive Session:")
        title_match = re.search(r'(?:Markup|Executive Session):\s*(.+?)(?:\d{2}/\d{2}/|$)', text)
        title = title_match.group(1).strip() if title_match else text[:100]

        # Check for POSTPONED
        postponed = 'POSTPONED' in text

        markups.append({
            'url': f"https://www.banking.senate.gov{href}",
            'date': date_str,
            'title': title,
            'postponed': postponed,
            'hash': hashlib.md5(href.encode()).hexdigest(),
        })

    return markups


def send_msg(text):
    chat_id = CFG.get('chat_id')
    if not chat_id:
        return
    r = requests.post(f"{BOT}/sendMessage", json={
        'chat_id': chat_id,
        'text': text,
        'parse_mode': 'HTML',
        'disable_web_page_preview': True
    }, timeout=15)
    return r.json()


def main():
    print(f"[{datetime.now()}] Checking Senate Banking markups...")

    markups = fetch_markups()
    print(f"  Found {len(markups)} markups on page")

    seen = load_seen()

    # First run: save current list without sending alerts
    if not seen:
        save_seen([m['hash'] for m in markups])
        print(f"  First run - saved {len(markups)} existing markups. No alerts sent.")
        return

    new_ones = [m for m in markups if m['hash'] not in seen]

    if not new_ones:
        print("  No new markups.")
        return

    print(f"  {len(new_ones)} new markup(s) found!")

    for m in new_ones:
        status = " [POSTPONED]" if m['postponed'] else ""
        msg = (
            f"<b>Senate Banking Committee - 새 Markup 등록{status}</b>\n\n"
            f"일정: {m['date']}\n"
            f"제목: {m['title']}\n\n"
            f"{m['url']}"
        )
        result = send_msg(msg)
        if result and result.get('ok'):
            print(f"  Sent: {m['title']}")
        else:
            print(f"  Failed: {result}")

    # Save all current hashes
    save_seen([m['hash'] for m in markups])
    print("  Done!")


if __name__ == '__main__':
    main()
