#!/usr/bin/env python3
"""
Newsletter Digest — Harvey
Fetches unread newsletters from Gmail, summarizes with Claude, generates HTML digest.
Usage:
  python3 newsletter-digest.py           # generate digest + notify via Telegram
  python3 newsletter-digest.py --quiet   # generate digest only, no Telegram
  python3 newsletter-digest.py --archive 1 3 5   # archive newsletters by number
  python3 newsletter-digest.py --archive all     # archive all from last digest
"""

import imaplib
import email
from email.header import decode_header
import json
import os
import sys
import re
import html
import urllib.request
import urllib.parse
from datetime import datetime
from pathlib import Path

# Config
GMAIL_USER = "mikeziarko@gmail.com"
SECRETS_DIR = Path.home() / ".openclaw/secrets"
WORKSPACE = Path.home() / ".openclaw/workspace"
OUTPUT_HTML = WORKSPACE / "newsletter-digest.html"
STATE_FILE = WORKSPACE / "memory" / "newsletter-state.json"
PREVIEW_BASE = "http://100.68.175.57:8765"
TELEGRAM_CHAT_ID = "8792051045"

def get_secret(name):
    return (SECRETS_DIR / name).read_text().strip()

def decode_str(s):
    if not s:
        return ""
    parts = decode_header(s)
    result = ""
    for part, charset in parts:
        if isinstance(part, bytes):
            try:
                result += part.decode(charset or "utf-8", errors="replace")
            except:
                result += part.decode("utf-8", errors="replace")
        else:
            result += part
    return result

def get_text_from_email(msg):
    """Extract plain text from email message."""
    text = ""
    if msg.is_multipart():
        for part in msg.walk():
            ct = part.get_content_type()
            cd = str(part.get("Content-Disposition", ""))
            if ct == "text/plain" and "attachment" not in cd:
                try:
                    payload = part.get_payload(decode=True)
                    charset = part.get_content_charset() or "utf-8"
                    text += payload.decode(charset, errors="replace")
                except:
                    pass
            elif ct == "text/html" and not text and "attachment" not in cd:
                try:
                    payload = part.get_payload(decode=True)
                    charset = part.get_content_charset() or "utf-8"
                    raw_html = payload.decode(charset, errors="replace")
                    # Strip tags roughly
                    raw_html = re.sub(r'<style[^>]*>.*?</style>', ' ', raw_html, flags=re.DOTALL)
                    raw_html = re.sub(r'<script[^>]*>.*?</script>', ' ', raw_html, flags=re.DOTALL)
                    raw_html = re.sub(r'<[^>]+>', ' ', raw_html)
                    raw_html = re.sub(r'&nbsp;', ' ', raw_html)
                    raw_html = re.sub(r'&amp;', '&', raw_html)
                    raw_html = re.sub(r'&lt;', '<', raw_html)
                    raw_html = re.sub(r'&gt;', '>', raw_html)
                    raw_html = re.sub(r'\s+', ' ', raw_html).strip()
                    text += raw_html
                except:
                    pass
    else:
        try:
            payload = msg.get_payload(decode=True)
            charset = msg.get_content_charset() or "utf-8"
            text = payload.decode(charset, errors="replace")
        except:
            text = str(msg.get_payload())
    
    # Trim to ~3000 chars for Claude
    return text[:3000].strip()

def summarize_with_claude(sender, subject, body):
    """Call Claude API to summarize a newsletter."""
    api_key = get_secret("anthropic-api-key.txt")
    
    prompt = f"""Summarize this newsletter in 2-3 sentences. Be direct and specific -- what's the actual content/news/insight? Skip generic descriptions.

From: {sender}
Subject: {subject}

Body:
{body}

Summary (2-3 sentences max):"""

    payload = json.dumps({
        "model": "claude-haiku-4-5",
        "max_tokens": 200,
        "messages": [{"role": "user", "content": prompt}]
    }).encode()

    req = urllib.request.Request(
        "https://api.anthropic.com/v1/messages",
        data=payload,
        headers={
            "x-api-key": api_key,
            "anthropic-version": "2023-06-01",
            "content-type": "application/json"
        }
    )
    try:
        with urllib.request.urlopen(req, timeout=15) as resp:
            data = json.loads(resp.read())
            return data["content"][0]["text"].strip()
    except Exception as e:
        return f"(Summary unavailable: {e})"

def send_telegram(message):
    """Send message to Mike's Telegram."""
    token_path = Path.home() / ".openclaw"
    # Use openclaw config to find token or use direct API
    # Read from openclaw config
    config_path = Path.home() / ".openclaw/openclaw.json"
    try:
        config = json.loads(config_path.read_text())
        # Find telegram account token
        for acct in config.get("accounts", []):
            if acct.get("provider") == "telegram":
                token = acct.get("botToken") or acct.get("token")
                if token:
                    break
        else:
            print("No Telegram token found in config")
            return
    except Exception as e:
        print(f"Could not read config: {e}")
        return

    payload = json.dumps({
        "chat_id": TELEGRAM_CHAT_ID,
        "text": message,
        "parse_mode": "HTML"
    }).encode()
    req = urllib.request.Request(
        f"https://api.telegram.org/bot{token}/sendMessage",
        data=payload,
        headers={"content-type": "application/json"}
    )
    try:
        with urllib.request.urlopen(req, timeout=10) as resp:
            pass
    except Exception as e:
        print(f"Telegram send failed: {e}")

def fetch_newsletters(imap, limit=20):
    """Fetch unread newsletters from inbox."""
    imap.select("INBOX")
    _, msgs = imap.search(None, 'UNSEEN')
    all_ids = msgs[0].split()
    
    newsletters = []
    checked = 0
    
    # Check from newest first
    for uid in reversed(all_ids):
        if len(newsletters) >= limit:
            break
        checked += 1
        if checked > 200:  # Don't scan forever
            break
            
        _, data = imap.fetch(uid, '(BODY.PEEK[HEADER])')
        raw = data[0][1]
        msg = email.message_from_bytes(raw)
        
        has_unsub = msg.get("List-Unsubscribe") or msg.get("List-ID")
        if not has_unsub:
            continue
            
        sender = decode_str(msg.get("From", ""))
        subject = decode_str(msg.get("Subject", "(no subject)"))
        date_str = msg.get("Date", "")
        
        newsletters.append({
            "uid": uid.decode(),
            "sender": sender,
            "subject": subject,
            "date": date_str,
            "summary": None
        })
    
    return newsletters

def load_full_email(imap, uid):
    _, data = imap.fetch(uid.encode(), '(RFC822)')
    raw = data[0][1]
    return email.message_from_bytes(raw)

def archive_emails(uids):
    """Archive (Gmail-archive = remove from inbox label) emails by UID."""
    password = get_secret("gmail-app-password.txt")
    imap = imaplib.IMAP4_SSL("imap.gmail.com")
    imap.login(GMAIL_USER, password)
    imap.select("INBOX")
    
    for uid in uids:
        # Gmail archive = remove \Inbox label via MOVE or STORE + expunge
        imap.uid('STORE', uid.encode(), '+FLAGS', '\\Deleted')
        # Actually for archiving we copy to All Mail and remove from inbox
        # Better: use Gmail's [Gmail]/All Mail
        imap.uid('COPY', uid.encode(), '[Gmail]/All Mail')
        imap.uid('STORE', uid.encode(), '+FLAGS', '\\Deleted')
    
    imap.expunge()
    imap.logout()
    return len(uids)

def generate_html(newsletters, generated_at):
    """Generate HTML digest page."""
    items_html = ""
    for i, n in enumerate(newsletters, 1):
        sender_clean = html.escape(n["sender"])
        subject_clean = html.escape(n["subject"])
        summary_clean = html.escape(n.get("summary") or "(no summary)")
        
        items_html += f"""
        <div class="newsletter" id="nl-{i}">
            <div class="nl-header">
                <span class="nl-num">{i}</span>
                <div class="nl-meta">
                    <div class="nl-subject">{subject_clean}</div>
                    <div class="nl-sender">{sender_clean}</div>
                </div>
            </div>
            <div class="nl-summary">{summary_clean}</div>
        </div>"""

    count = len(newsletters)
    ts = generated_at.strftime("%A, %B %-d at %-I:%M %p")

    return f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Newsletter Digest</title>
<style>
  * {{ box-sizing: border-box; margin: 0; padding: 0; }}
  body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; background: #f5f5f5; color: #222; }}
  .header {{ background: #1a1a2e; color: white; padding: 24px 32px; }}
  .header h1 {{ font-size: 22px; font-weight: 600; }}
  .header .meta {{ font-size: 13px; color: #aaa; margin-top: 4px; }}
  .instructions {{ background: #fff3cd; border-left: 4px solid #f5a623; padding: 12px 20px; font-size: 13px; color: #555; }}
  .container {{ max-width: 720px; margin: 0 auto; padding: 24px 16px; }}
  .newsletter {{ background: white; border-radius: 10px; padding: 16px 20px; margin-bottom: 14px; box-shadow: 0 1px 4px rgba(0,0,0,0.07); }}
  .nl-header {{ display: flex; align-items: flex-start; gap: 14px; margin-bottom: 10px; }}
  .nl-num {{ background: #1a1a2e; color: white; border-radius: 50%; width: 28px; height: 28px; display: flex; align-items: center; justify-content: center; font-size: 12px; font-weight: 700; flex-shrink: 0; margin-top: 2px; }}
  .nl-subject {{ font-weight: 600; font-size: 15px; line-height: 1.3; }}
  .nl-sender {{ font-size: 12px; color: #888; margin-top: 3px; }}
  .nl-summary {{ font-size: 14px; color: #444; line-height: 1.6; padding-left: 42px; }}
  .empty {{ text-align: center; color: #888; padding: 60px 20px; }}
</style>
</head>
<body>
<div class="header">
  <h1>Newsletter Digest</h1>
  <div class="meta">{count} newsletters &mdash; {ts}</div>
</div>
<div class="instructions">
  To archive, reply to Harvey with: <strong>archive 1 3 5</strong> or <strong>archive all</strong>
</div>
<div class="container">
  {"".join([items_html]) if newsletters else '<div class="empty">No unread newsletters found.</div>'}
</div>
</body>
</html>"""

def load_state():
    if STATE_FILE.exists():
        return json.loads(STATE_FILE.read_text())
    return {}

def save_state(data):
    STATE_FILE.parent.mkdir(exist_ok=True)
    STATE_FILE.write_text(json.dumps(data, indent=2))

def main():
    args = sys.argv[1:]
    
    # Archive mode
    if "--archive" in args:
        idx = args.index("--archive")
        targets = args[idx+1:]
        state = load_state()
        last_uids = state.get("last_uids", [])
        
        if not last_uids:
            print("No digest state found. Run digest first.")
            sys.exit(1)
        
        if targets == ["all"]:
            to_archive = last_uids
        else:
            nums = [int(x)-1 for x in targets if x.isdigit()]
            to_archive = [last_uids[n] for n in nums if 0 <= n < len(last_uids)]
        
        if not to_archive:
            print("Nothing to archive.")
            sys.exit(0)
        
        archived = archive_emails(to_archive)
        print(f"Archived {archived} newsletter(s).")
        sys.exit(0)
    
    quiet = "--quiet" in args
    limit = 15  # newsletters per digest

    password = get_secret("gmail-app-password.txt")
    imap = imaplib.IMAP4_SSL("imap.gmail.com")
    imap.login(GMAIL_USER, password)
    
    print("Fetching newsletters...")
    newsletters = fetch_newsletters(imap, limit=limit)
    print(f"Found {len(newsletters)} newsletters. Summarizing...")
    
    uids = []
    for i, n in enumerate(newsletters, 1):
        print(f"  [{i}/{len(newsletters)}] {n['subject'][:50]}...")
        full_msg = load_full_email(imap, n["uid"])
        body = get_text_from_email(full_msg)
        n["summary"] = summarize_with_claude(n["sender"], n["subject"], body)
        uids.append(n["uid"])
    
    imap.logout()
    
    now = datetime.now()
    html_content = generate_html(newsletters, now)
    OUTPUT_HTML.write_text(html_content)
    print(f"Digest saved: {OUTPUT_HTML}")
    
    # Save state for archiving
    save_state({"last_uids": uids, "generated_at": now.isoformat()})
    
    page_url = f"{PREVIEW_BASE}/newsletter-digest.html"
    
    if not quiet and newsletters:
        msg = (
            f"📰 <b>Newsletter Digest</b>\n"
            f"{len(newsletters)} newsletters summarized.\n\n"
            f"<a href='{page_url}'>Open digest</a>\n\n"
            f"Reply <b>archive 1 3 5</b> or <b>archive all</b> when done."
        )
        send_telegram(msg)
        print("Telegram notification sent.")
    elif not newsletters:
        print("No unread newsletters found.")

if __name__ == "__main__":
    main()
