#!/usr/bin/env python3
"""
Yelp Lead Monitor
=================
Scans Gmail for Yelp lead/message notification emails and alerts Mike on Telegram.
Designed to run via cron every 15–30 minutes.

State file: scripts/yelp-lead-state.json

SETUP:
  1. Ensure secrets are in place:
       ~/.openclaw/secrets/gmail-app-password.txt
       ~/.openclaw/secrets/telegram-bot-token.txt

  2. Test manually:
       python3 scripts/yelp-lead-monitor.py --dry-run

  3. Add to cron (every 20 minutes):
       */20 * * * * /usr/bin/python3 /Users/harvey/.openclaw/workspace/scripts/yelp-lead-monitor.py --check >> /tmp/yelp-lead-monitor.log 2>&1

USAGE:
  python3 scripts/yelp-lead-monitor.py [--check] [--dry-run] [--status] [--hours N]
"""

import imaplib
import email
import os
import sys
import json
import re
import socket

# Set a global socket timeout so IMAP doesn't hang
socket.setdefaulttimeout(20)
import argparse
import urllib.request
import urllib.parse
from datetime import datetime, timedelta
from email.header import decode_header

# ── Config ────────────────────────────────────────────────────────────────────

GMAIL_USER    = 'mikeziarko@gmail.com'
GMAIL_PWD_FILE = os.path.expanduser('~/.openclaw/secrets/gmail-app-password.txt')
BOT_TOKEN_FILE = os.path.expanduser('~/.openclaw/secrets/telegram-bot-token.txt')
CHAT_ID       = '8792051045'
STATE_FILE    = os.path.expanduser('~/.openclaw/workspace/scripts/yelp-lead-state.json')

# Fallback token (same bot used by presale-monitor-gmail.py)
FALLBACK_BOT_TOKEN = '7764678817:AAH1_A4woI2gKd13gSfRF_uAStowKxsKARg'

FOLDERS_TO_CHECK = ['INBOX']

# Yelp sender domains
YELP_DOMAINS = ['@yelp.com', '@email.yelp.com', '@biz.yelp.com']

# ── Helpers ───────────────────────────────────────────────────────────────────

def decode_header_value(value):
    if not value:
        return ''
    decoded = decode_header(value)
    parts = []
    for part, charset in decoded:
        if isinstance(part, bytes):
            parts.append(part.decode(charset or 'utf-8', errors='replace'))
        else:
            parts.append(str(part))
    return ''.join(parts)

def load_secrets():
    try:
        pwd = open(GMAIL_PWD_FILE).read().strip()
    except FileNotFoundError:
        print(f"ERROR: Gmail app password not found at {GMAIL_PWD_FILE}", file=sys.stderr)
        sys.exit(1)

    if os.path.exists(BOT_TOKEN_FILE):
        bot_token = open(BOT_TOKEN_FILE).read().strip()
    else:
        bot_token = FALLBACK_BOT_TOKEN

    return pwd, bot_token

def load_state():
    if os.path.exists(STATE_FILE):
        try:
            with open(STATE_FILE) as f:
                return json.load(f)
        except Exception:
            pass
    return {'processed_ids': [], 'last_run': None, 'total_leads': 0}

def save_state(state):
    with open(STATE_FILE, 'w') as f:
        json.dump(state, f, indent=2)

def send_telegram(message, bot_token, dry_run=False):
    if dry_run:
        print(f"[DRY RUN] Would send Telegram:\n{message}\n")
        return True
    url = f'https://api.telegram.org/bot{bot_token}/sendMessage'
    data = urllib.parse.urlencode({
        'chat_id': CHAT_ID,
        'text': message,
        'parse_mode': 'HTML',
    }).encode()
    req = urllib.request.Request(url, data=data)
    try:
        urllib.request.urlopen(req, timeout=10)
        return True
    except Exception as e:
        print(f"Telegram error: {e}", file=sys.stderr)
        return False

def is_yelp_email(sender):
    sender_lower = sender.lower()
    return any(domain in sender_lower for domain in YELP_DOMAINS)

def get_message_id(msg):
    mid = msg.get('Message-ID', '')
    if mid:
        return mid.strip()
    return f"{msg.get('Date','')}-{msg.get('Subject','')}"

# ── Email body parsing ────────────────────────────────────────────────────────

def get_body_text(msg_obj):
    """Extract plain text body from email message."""
    body = ''
    if msg_obj.is_multipart():
        for part in msg_obj.walk():
            ct = part.get_content_type()
            cd = str(part.get('Content-Disposition', ''))
            if ct == 'text/plain' and 'attachment' not in cd:
                try:
                    charset = part.get_content_charset() or 'utf-8'
                    body = part.get_payload(decode=True).decode(charset, errors='replace')
                    break
                except Exception:
                    pass
    else:
        try:
            charset = msg_obj.get_content_charset() or 'utf-8'
            body = msg_obj.get_payload(decode=True).decode(charset, errors='replace')
        except Exception:
            pass
    return body

def parse_yelp_lead(subject, body):
    """
    Extract structured info from a Yelp lead email.
    Returns dict with: customer_name, service, message, lead_type
    """
    info = {
        'customer_name': 'Unknown',
        'service': 'cleaning services',
        'message': '',
        'lead_type': 'lead',
    }

    subject_lower = subject.lower()

    # Determine lead type from subject
    if 'message' in subject_lower or 'sent you a message' in subject_lower:
        info['lead_type'] = 'message'
    elif 'quote' in subject_lower or 'request' in subject_lower:
        info['lead_type'] = 'quote request'
    elif 'review' in subject_lower:
        info['lead_type'] = 'review'
    elif 'lead' in subject_lower:
        info['lead_type'] = 'lead'

    # Extract customer name from subject
    # Patterns: "John D. sent you a message", "New lead from Jane S."
    name_patterns = [
        r'^([A-Z][a-z]+(?: [A-Z][a-z.]+)?)\s+sent you',
        r'from\s+([A-Z][a-z]+(?: [A-Z][a-z.]+)?)',
        r'([A-Z][a-z]+(?: [A-Z][a-z.]+)?)\s+(?:is interested|wants|requested)',
    ]
    for pattern in name_patterns:
        m = re.search(pattern, subject)
        if m:
            info['customer_name'] = m.group(1).strip()
            break

    if not body:
        return info

    # Extract customer message from body
    # Yelp typically has the message block after "Message:" or similar
    msg_patterns = [
        r'Message:\s*\n+([\s\S]{10,400}?)(?:\n\n|\Z)',
        r'(?:wrote|says|said):\s*\n+([\s\S]{10,400}?)(?:\n\n|\Z)',
        r'"([\s\S]{10,400?})"',
    ]
    for pattern in msg_patterns:
        m = re.search(pattern, body, re.IGNORECASE)
        if m:
            info['message'] = m.group(1).strip()[:500]
            break

    # Extract service requested
    service_patterns = [
        r'Service:\s*(.+)',
        r'Looking for:\s*(.+)',
        r'Category:\s*(.+)',
        r'(?:home|house|apartment|office)\s+cleaning',
    ]
    for pattern in service_patterns:
        m = re.search(pattern, body, re.IGNORECASE)
        if m:
            svc = m.group(1).strip() if m.lastindex else m.group(0).strip()
            info['service'] = svc[:100]
            break

    return info

def build_reply_template(customer_name, service, their_message):
    """Generate a friendly, professional reply template for No More Chores."""
    first_name = customer_name.split()[0] if customer_name != 'Unknown' else 'there'
    service_ref = service if service and service != 'cleaning services' else 'cleaning services'

    template = (
        f"Hi {first_name}! Thanks so much for reaching out to No More Chores. "
        f"We'd love to help you with {service_ref}! "
        f"We're a professional cleaning company serving Toronto, and we take pride in delivering spotless results. "
        f"Could you share a bit more about your space — size, frequency, and any specific areas of focus? "
        f"We can then put together a custom quote for you. Looking forward to hearing from you!"
    )
    return template

def build_telegram_message(subject, sender, date, lead_info):
    """Build the Telegram notification message."""
    name = lead_info['customer_name']
    svc = lead_info['service']
    msg = lead_info['message']
    lead_type = lead_info['lead_type']
    reply = build_reply_template(name, svc, msg)

    parts = [
        f"🧹 <b>New Yelp {lead_type.title()}!</b>",
        f"",
        f"<b>Customer:</b> {name}",
        f"<b>Looking for:</b> {svc}",
    ]
    if msg:
        parts += [f"", f"<b>Their message:</b>", f"{msg[:400]}"]

    parts += [
        f"",
        f"<b>Suggested reply:</b>",
        f"<i>{reply}</i>",
        f"",
        f"📧 Subject: {subject}",
        f"🕐 {date}",
    ]
    return '\n'.join(parts)

# ── Main modes ────────────────────────────────────────────────────────────────

def do_check(hours, dry_run=False):
    gmail_pwd, bot_token = load_secrets()

    print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}] Connecting to Gmail...")
    try:
        mail = imaplib.IMAP4_SSL('imap.gmail.com')
        mail.login(GMAIL_USER, gmail_pwd)
    except Exception as e:
        print(f"ERROR: Gmail login failed: {e}", file=sys.stderr)
        sys.exit(1)

    state = load_state()
    processed_ids = set(state.get('processed_ids', []))
    new_processed = list(processed_ids)
    leads_found = 0
    alerts_sent = 0

    since = (datetime.now() - timedelta(hours=hours)).strftime('%d-%b-%Y')

    for folder in FOLDERS_TO_CHECK:
        try:
            status, _ = mail.select(folder, readonly=True)
            if status != 'OK':
                print(f"  Skipping {folder} (could not select)")
                continue
        except Exception as e:
            print(f"  Skipping {folder}: {e}", file=sys.stderr)
            continue

        try:
            status, data = mail.search(None, f'SINCE {since}')
        except Exception as e:
            print(f"  Search error in {folder}: {e}", file=sys.stderr)
            continue

        if status != 'OK' or not data[0]:
            print(f"  {folder}: 0 emails in last {hours}h")
            continue

        ids = data[0].split()
        print(f"  {folder}: {len(ids)} emails to scan")

        for mid in ids:
            try:
                # Fetch headers first for filtering
                _, hdr_data = mail.fetch(mid, '(BODY.PEEK[HEADER.FIELDS (FROM SUBJECT DATE MESSAGE-ID)])')
                raw_hdr = hdr_data[0][1].decode('utf-8', errors='replace')
                hdr_msg = email.message_from_string(raw_hdr)

                sender  = decode_header_value(hdr_msg.get('From', ''))
                subject = decode_header_value(hdr_msg.get('Subject', ''))
                date    = hdr_msg.get('Date', '')
                msg_id  = get_message_id(hdr_msg)

                if not is_yelp_email(sender):
                    continue

                if msg_id in processed_ids:
                    continue

                leads_found += 1
                print(f"  → Yelp email: {subject[:60]}")

                # Fetch full body for parsing
                body = ''
                try:
                    _, body_data = mail.fetch(mid, '(BODY.PEEK[TEXT])')
                    raw_body = body_data[0][1].decode('utf-8', errors='replace')
                    body_msg = email.message_from_string(f"Content-Type: text/plain\n\n{raw_body}")
                    body = raw_body
                except Exception:
                    pass

                lead_info = parse_yelp_lead(subject, body)
                tg_message = build_telegram_message(subject, sender, date, lead_info)

                if send_telegram(tg_message, bot_token, dry_run=dry_run):
                    alerts_sent += 1
                    new_processed.append(msg_id)
                    print(f"    ✓ Alert sent for: {lead_info['customer_name']}")
                else:
                    print(f"    ✗ Telegram send failed")

            except Exception as e:
                print(f"    Error processing message: {e}", file=sys.stderr)

    try:
        mail.logout()
    except Exception:
        pass

    # Save state (cap at 5000 IDs)
    state['processed_ids'] = new_processed[-5000:]
    state['last_run'] = datetime.now().isoformat()
    state['total_leads'] = state.get('total_leads', 0) + leads_found
    save_state(state)

    mode_label = '[DRY RUN] ' if dry_run else ''
    print(f"\n{mode_label}Done. Yelp emails found: {leads_found} | Alerts sent: {alerts_sent}")
    return 0

def do_status():
    state = load_state()
    last_run = state.get('last_run', 'Never')
    total = state.get('total_leads', 0)
    processed = len(state.get('processed_ids', []))
    print(f"Last run:       {last_run}")
    print(f"Total leads:    {total}")
    print(f"Tracked emails: {processed}")
    return 0

# ── Entry point ───────────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(description='Monitor Gmail for Yelp leads and alert via Telegram.')
    parser.add_argument('--check',   action='store_true', help='Check Gmail and send alerts (default)')
    parser.add_argument('--dry-run', action='store_true', help='Check but do not send Telegram messages')
    parser.add_argument('--status',  action='store_true', help='Show last run time and lead count')
    parser.add_argument('--hours',   type=int, default=24, help='How many hours back to scan (default: 24)')
    args = parser.parse_args()

    if args.status:
        return do_status()
    elif args.dry_run:
        return do_check(args.hours, dry_run=True)
    else:
        # --check is default
        return do_check(args.hours, dry_run=False)

if __name__ == '__main__':
    sys.exit(main())
