#!/usr/bin/env python3
"""
Gmail Label Triage — Harvey
Categorizes unread emails, applies Gmail labels, and archives non-Action emails.
Run once for backlog or scheduled via cron for daily cleanup.
"""
import imaplib
import email
from email.header import decode_header
import json
import re
import sys
import urllib.request
from pathlib import Path

GMAIL_USER  = "mikeziarko@gmail.com"
SECRETS_DIR = Path.home() / ".openclaw/secrets"

LABELS = {
    "action":     "Harvey/Action",
    "read":       "Harvey/Read",
    "receipt":    "Harvey/Receipt",
    "newsletter": "Harvey/Newsletter",
    "junk":       "Harvey/Junk",
}

# Categories to archive out of inbox (non-Action)
ARCHIVE_CATS = {"read", "receipt", "newsletter", "junk"}

def get_secret(name):
    return (SECRETS_DIR / name).read_text().strip()

def decode_str(s):
    if not s: return ""
    parts = decode_header(s)
    out = ""
    for part, charset in parts:
        if isinstance(part, bytes):
            out += part.decode(charset or "utf-8", errors="replace")
        else:
            out += part
    return out

def get_imap():
    imap = imaplib.IMAP4_SSL("imap.gmail.com")
    imap.login(GMAIL_USER, get_secret("gmail-app-password.txt"))
    return imap

def ensure_labels(imap):
    for label in LABELS.values():
        try:
            imap.create(label)
        except:
            pass

def apply_label(imap, uid, label):
    imap.uid('STORE', uid, '+X-GM-LABELS', f'"{label}"')

def archive_uids(imap, uids):
    """Archive a list of UIDs by copying to All Mail and deleting from inbox."""
    if not uids:
        return 0
    for i in range(0, len(uids), 50):
        batch = uids[i:i+50]
        id_str = b",".join(batch)
        imap.copy(id_str, '"[Gmail]/All Mail"')
        imap.store(id_str, "+FLAGS", "\\Deleted")
    imap.expunge()
    return len(uids)

def claude_categorize_batch(emails_meta):
    api_key = get_secret("anthropic-api-key.txt")
    lines = "\n".join(
        f'{i+1}. From: {e["sender"][:60]} | Subject: {e["subject"][:80]} | Preview: {e["snippet"][:100]}'
        for i, e in enumerate(emails_meta)
    )
    prompt = f"""Categorize each email into exactly one category:
- action: needs a reply or decision (customer issues, team questions, vendor problems)
- read: informational, no reply needed (alerts, updates, notifications)
- receipt: payment confirmations, invoices, order confirmations
- newsletter: marketing emails, newsletters, promotional content
- junk: spam, cold outreach, sweepstakes, irrelevant promotions

Reply ONLY with JSON mapping line number to category. Example: {{"1":"action","2":"receipt"}}

Emails:
{lines}

JSON:"""

    payload = json.dumps({
        "model": "claude-haiku-4-5",
        "max_tokens": 600,
        "messages": [{"role": "user", "content": prompt}]
    }).encode()
    req = urllib.request.Request(
        "https://api.anthropic.com/v1/messages",
        data=payload,
        headers={"x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json"}
    )
    try:
        with urllib.request.urlopen(req, timeout=20) as r:
            result = json.loads(r.read())["content"][0]["text"].strip()
        m = re.search(r'\{[^{}]+\}', result, re.DOTALL)
        if m:
            return json.loads(m.group())
    except Exception as e:
        print(f"  Claude error: {e}")
    return {}

def send_telegram_summary(action_emails):
    """Send a Telegram summary of Action items to Mike."""
    try:
        token = get_secret("telegram-bot-token.txt")
    except:
        return  # No token, skip silently

    chat_id = "8792051045"
    if not action_emails:
        text = "✅ Gmail triage complete -- inbox is clean, no new Action items."
    else:
        lines = [f"📬 *Gmail Triage* -- {len(action_emails)} Action items need your attention:\n"]
        for e in action_emails[:10]:
            sender = e['sender'].split('<')[0].strip()[:30]
            subject = e['subject'][:50]
            lines.append(f"• {sender}: {subject}")
        if len(action_emails) > 10:
            lines.append(f"...and {len(action_emails) - 10} more")
        text = "\n".join(lines)

    payload = json.dumps({
        "chat_id": chat_id,
        "text": text,
        "parse_mode": "Markdown"
    }).encode()
    req = urllib.request.Request(
        f"https://api.telegram.org/bot{token}/sendMessage",
        data=payload,
        headers={"content-type": "application/json"}
    )
    try:
        urllib.request.urlopen(req, timeout=10)
    except Exception as e:
        print(f"  Telegram error: {e}")

def main():
    notify = "--notify" in sys.argv  # Pass --notify flag for cron runs

    imap = get_imap()
    imap.select("INBOX")

    ensure_labels(imap)

    # Get ALL emails in inbox (read + unread), excluding already-labelled Action items
    _, msgs = imap.uid('SEARCH', None, 'ALL')
    all_uids = msgs[0].split()

    # Filter out emails already labelled Harvey/Action (already processed)
    _, action_msgs = imap.uid('SEARCH', None, 'X-GM-LABELS "Harvey/Action"')
    action_uids_set = set(action_msgs[0].split()) if action_msgs[0] else set()
    all_uids = [uid for uid in all_uids if uid not in action_uids_set]

    total = len(all_uids)

    if total == 0:
        print("Inbox is clean — nothing to process.")
        if notify:
            send_telegram_summary([])
        imap.logout()
        return

    print(f"Found {total} emails to process (read + unread). Categorizing in batches...")

    # Fetch headers
    emails = []
    for uid in reversed(all_uids):  # newest first
        _, data = imap.uid('FETCH', uid, '(BODY.PEEK[HEADER])')
        msg = email.message_from_bytes(data[0][1])
        sender  = decode_str(msg.get("From", ""))
        subject = decode_str(msg.get("Subject", "(no subject)"))
        unsub   = msg.get("List-Unsubscribe", "")
        emails.append({
            "uid":     uid.decode(),
            "sender":  sender,
            "subject": subject,
            "snippet": "",
            "unsub":   unsub
        })

    # Categorize in batches of 25
    BATCH = 25
    results = {}
    for i in range(0, len(emails), BATCH):
        batch = emails[i:i+BATCH]
        print(f"  Categorizing {i+1}-{min(i+BATCH, total)} of {total}...")
        mapping = claude_categorize_batch(batch)
        for j, e in enumerate(batch):
            cat = mapping.get(str(j+1), "read")
            if cat not in LABELS: cat = "read"
            if e["unsub"] and cat not in ("action", "receipt"):
                cat = "newsletter"
            results[e["uid"]] = cat

    # Apply labels
    print(f"\nApplying labels to {len(results)} emails...")
    label_counts = {c: 0 for c in LABELS}
    action_emails = []
    archive_uids_list = []

    for e in emails:
        uid = e["uid"]
        cat = results.get(uid, "read")
        label = LABELS[cat]
        try:
            apply_label(imap, uid.encode(), label)
            label_counts[cat] += 1
            if cat == "action":
                action_emails.append(e)
            else:
                archive_uids_list.append(uid.encode())
        except Exception as ex:
            print(f"  Label error for {uid}: {ex}")

    # Switch to seq-based IDs for archiving
    imap.select("INBOX")
    archived = 0
    if archive_uids_list:
        print(f"\nArchiving {len(archive_uids_list)} non-Action emails...")
        archived = archive_uids_list_by_seq(imap, archive_uids_list)

    imap.logout()

    print("\nDone!")
    print(f"  Action (kept in inbox): {label_counts['action']}")
    print(f"  Archived: {archived}")
    for cat in ["read", "receipt", "newsletter", "junk"]:
        print(f"    {LABELS[cat]}: {label_counts[cat]}")

    if notify:
        send_telegram_summary(action_emails)


def archive_uids_list_by_seq(imap, uid_list):
    """Archive emails by UID using IMAP UID commands."""
    if not uid_list:
        return 0
    total = 0
    for i in range(0, len(uid_list), 50):
        batch = uid_list[i:i+50]
        uid_str = b",".join(batch)
        result = imap.uid('COPY', uid_str, '"[Gmail]/All Mail"')
        if result[0] == 'OK':
            imap.uid('STORE', uid_str, '+FLAGS', '\\Deleted')
            total += len(batch)
    imap.expunge()
    return total


if __name__ == "__main__":
    main()
