Newsletter Digest

#!/usr/bin/env python3 """ Newsletter Digest — Harvey Fetches unread newsletters from Gmail, summarizes with Claude, generates HTML digest. Usage: python3 newsletter-digest.py # generate digest + notify via Telegram python3 newsletter-digest.py --quiet # generate digest only, no Telegram python3 newsletter-digest.py --archive 1 3 5 # archive newsletters by number python3 newsletter-digest.py --archive all # archive all from last digest """ import imaplib import email from email.header import decode_header import json import os import sys import re import html import urllib.request import urllib.parse from datetime import datetime from pathlib import Path # Config GMAIL_USER = "mikeziarko@gmail.com" SECRETS_DIR = Path.home() / ".openclaw/secrets" WORKSPACE = Path.home() / ".openclaw/workspace" OUTPUT_HTML = WORKSPACE / "newsletter-digest.html" STATE_FILE = WORKSPACE / "memory" / "newsletter-state.json" PREVIEW_BASE = "http://100.68.175.57:8765" TELEGRAM_CHAT_ID = "8792051045" def get_secret(name): return (SECRETS_DIR / name).read_text().strip() def decode_str(s): if not s: return "" parts = decode_header(s) result = "" for part, charset in parts: if isinstance(part, bytes): try: result += part.decode(charset or "utf-8", errors="replace") except: result += part.decode("utf-8", errors="replace") else: result += part return result def get_text_from_email(msg): """Extract plain text from email message.""" text = "" if msg.is_multipart(): for part in msg.walk(): ct = part.get_content_type() cd = str(part.get("Content-Disposition", "")) if ct == "text/plain" and "attachment" not in cd: try: payload = part.get_payload(decode=True) charset = part.get_content_charset() or "utf-8" text += payload.decode(charset, errors="replace") except: pass elif ct == "text/html" and not text and "attachment" not in cd: try: payload = part.get_payload(decode=True) charset = part.get_content_charset() or "utf-8" raw_html = payload.decode(charset, errors="replace") # Strip tags roughly raw_html = re.sub(r']*>.*?', ' ', raw_html, flags=re.DOTALL) raw_html = re.sub(r']*>.*?', ' ', raw_html, flags=re.DOTALL) raw_html = re.sub(r'<[^>]+>', ' ', raw_html) raw_html = re.sub(r' ', ' ', raw_html) raw_html = re.sub(r'&', '&', raw_html) raw_html = re.sub(r'<', '<', raw_html) raw_html = re.sub(r'>', '>', raw_html) raw_html = re.sub(r'\s+', ' ', raw_html).strip() text += raw_html except: pass else: try: payload = msg.get_payload(decode=True) charset = msg.get_content_charset() or "utf-8" text = payload.decode(charset, errors="replace") except: text = str(msg.get_payload()) # Trim to ~3000 chars for Claude return text[:3000].strip() def summarize_with_claude(sender, subject, body): """Call Claude API to summarize a newsletter.""" api_key = get_secret("anthropic-api-key.txt") prompt = f"""Summarize this newsletter in 2-3 sentences. Be direct and specific -- what's the actual content/news/insight? Skip generic descriptions. From: {sender} Subject: {subject} Body: {body} Summary (2-3 sentences max):""" payload = json.dumps({ "model": "claude-haiku-4-5", "max_tokens": 200, "messages": [{"role": "user", "content": prompt}] }).encode() req = urllib.request.Request( "https://api.anthropic.com/v1/messages", data=payload, headers={ "x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json" } ) try: with urllib.request.urlopen(req, timeout=15) as resp: data = json.loads(resp.read()) return data["content"][0]["text"].strip() except Exception as e: return f"(Summary unavailable: {e})" def send_telegram(message): """Send message to Mike's Telegram.""" token_path = Path.home() / ".openclaw" # Use openclaw config to find token or use direct API # Read from openclaw config config_path = Path.home() / ".openclaw/openclaw.json" try: config = json.loads(config_path.read_text()) # Find telegram account token for acct in config.get("accounts", []): if acct.get("provider") == "telegram": token = acct.get("botToken") or acct.get("token") if token: break else: print("No Telegram token found in config") return except Exception as e: print(f"Could not read config: {e}") return payload = json.dumps({ "chat_id": TELEGRAM_CHAT_ID, "text": message, "parse_mode": "HTML" }).encode() req = urllib.request.Request( f"https://api.telegram.org/bot{token}/sendMessage", data=payload, headers={"content-type": "application/json"} ) try: with urllib.request.urlopen(req, timeout=10) as resp: pass except Exception as e: print(f"Telegram send failed: {e}") def fetch_newsletters(imap, limit=20): """Fetch unread newsletters from inbox.""" imap.select("INBOX") _, msgs = imap.search(None, 'UNSEEN') all_ids = msgs[0].split() newsletters = [] checked = 0 # Check from newest first for uid in reversed(all_ids): if len(newsletters) >= limit: break checked += 1 if checked > 200: # Don't scan forever break _, data = imap.fetch(uid, '(BODY.PEEK[HEADER])') raw = data[0][1] msg = email.message_from_bytes(raw) has_unsub = msg.get("List-Unsubscribe") or msg.get("List-ID") if not has_unsub: continue sender = decode_str(msg.get("From", "")) subject = decode_str(msg.get("Subject", "(no subject)")) date_str = msg.get("Date", "") newsletters.append({ "uid": uid.decode(), "sender": sender, "subject": subject, "date": date_str, "summary": None }) return newsletters def load_full_email(imap, uid): _, data = imap.fetch(uid.encode(), '(RFC822)') raw = data[0][1] return email.message_from_bytes(raw) def archive_emails(uids): """Archive (Gmail-archive = remove from inbox label) emails by UID.""" password = get_secret("gmail-app-password.txt") imap = imaplib.IMAP4_SSL("imap.gmail.com") imap.login(GMAIL_USER, password) imap.select("INBOX") for uid in uids: # Gmail archive = remove \Inbox label via MOVE or STORE + expunge imap.uid('STORE', uid.encode(), '+FLAGS', '\\Deleted') # Actually for archiving we copy to All Mail and remove from inbox # Better: use Gmail's [Gmail]/All Mail imap.uid('COPY', uid.encode(), '[Gmail]/All Mail') imap.uid('STORE', uid.encode(), '+FLAGS', '\\Deleted') imap.expunge() imap.logout() return len(uids) def generate_html(newsletters, generated_at): """Generate HTML digest page.""" items_html = "" for i, n in enumerate(newsletters, 1): sender_clean = html.escape(n["sender"]) subject_clean = html.escape(n["subject"]) summary_clean = html.escape(n.get("summary") or "(no summary)") items_html += f""" """ count = len(newsletters) ts = generated_at.strftime("%A, %B %-d at %-I:%M %p") return f""" Newsletter Digest

To archive, reply to Harvey with: archive 1 3 5 or archive all

{"".join([items_html]) if newsletters else '

No unread newsletters found.

""" def load_state(): if STATE_FILE.exists(): return json.loads(STATE_FILE.read_text()) return {} def save_state(data): STATE_FILE.parent.mkdir(exist_ok=True) STATE_FILE.write_text(json.dumps(data, indent=2)) def main(): args = sys.argv[1:] # Archive mode if "--archive" in args: idx = args.index("--archive") targets = args[idx+1:] state = load_state() last_uids = state.get("last_uids", []) if not last_uids: print("No digest state found. Run digest first.") sys.exit(1) if targets == ["all"]: to_archive = last_uids else: nums = [int(x)-1 for x in targets if x.isdigit()] to_archive = [last_uids[n] for n in nums if 0 <= n < len(last_uids)] if not to_archive: print("Nothing to archive.") sys.exit(0) archived = archive_emails(to_archive) print(f"Archived {archived} newsletter(s).") sys.exit(0) quiet = "--quiet" in args limit = 15 # newsletters per digest password = get_secret("gmail-app-password.txt") imap = imaplib.IMAP4_SSL("imap.gmail.com") imap.login(GMAIL_USER, password) print("Fetching newsletters...") newsletters = fetch_newsletters(imap, limit=limit) print(f"Found {len(newsletters)} newsletters. Summarizing...") uids = [] for i, n in enumerate(newsletters, 1): print(f" [{i}/{len(newsletters)}] {n['subject'][:50]}...") full_msg = load_full_email(imap, n["uid"]) body = get_text_from_email(full_msg) n["summary"] = summarize_with_claude(n["sender"], n["subject"], body) uids.append(n["uid"]) imap.logout() now = datetime.now() html_content = generate_html(newsletters, now) OUTPUT_HTML.write_text(html_content) print(f"Digest saved: {OUTPUT_HTML}") # Save state for archiving save_state({"last_uids": uids, "generated_at": now.isoformat()}) page_url = f"{PREVIEW_BASE}/newsletter-digest.html" if not quiet and newsletters: msg = ( f"📰 Newsletter Digest\n" f"{len(newsletters)} newsletters summarized.\n\n" f"Open digest\n\n" f"Reply archive 1 3 5 or archive all when done." ) send_telegram(msg) print("Telegram notification sent.") elif not newsletters: print("No unread newsletters found.") if __name__ == "__main__": main()