#!/usr/bin/env python3
"""
Pull Google Meet transcript from Drive after a call.
Usage: python3 pull-meet-transcript.py [--hours 2] [--keyword "Kristi"]
Looks for Meet transcript files created in the last N hours.
"""

import sys
import json
import time
import urllib.request
import urllib.parse
import urllib.error
import argparse
import base64
import hmac
import hashlib
import struct
import textwrap
from datetime import datetime, timezone, timedelta

SA_KEY_FILE = "/Users/harvey/.openclaw/secrets/google-calendar-sa.json"
DELEGATED_USER = "mikeziarko@gmail.com"

SCOPES = [
    "https://www.googleapis.com/auth/drive.readonly"
]

def load_sa_key():
    with open(SA_KEY_FILE) as f:
        return json.load(f)

def b64url(data):
    if isinstance(data, str):
        data = data.encode()
    return base64.urlsafe_b64encode(data).rstrip(b"=").decode()

def make_jwt(sa, scopes, sub=None):
    now = int(time.time())
    header = b64url(json.dumps({"alg": "RS256", "typ": "JWT"}))
    payload = {
        "iss": sa["client_email"],
        "scope": " ".join(scopes),
        "aud": "https://oauth2.googleapis.com/token",
        "iat": now,
        "exp": now + 3600,
    }
    if sub:
        payload["sub"] = sub
    payload_b64 = b64url(json.dumps(payload))
    signing_input = f"{header}.{payload_b64}".encode()

    # Sign with RSA private key using subprocess (avoid cryptography lib dependency)
    import subprocess, tempfile, os
    private_key = sa["private_key"]
    with tempfile.NamedTemporaryFile(mode='w', suffix='.pem', delete=False) as kf:
        kf.write(private_key)
        key_path = kf.name
    with tempfile.NamedTemporaryFile(delete=False) as df:
        df.write(signing_input)
        data_path = df.name
    try:
        result = subprocess.run(
            ["openssl", "dgst", "-sha256", "-sign", key_path, data_path],
            capture_output=True
        )
        signature = b64url(result.stdout)
    finally:
        os.unlink(key_path)
        os.unlink(data_path)

    return f"{header}.{payload_b64}.{signature}"

def get_access_token(sa, scopes, sub=None):
    jwt = make_jwt(sa, scopes, sub)
    data = urllib.parse.urlencode({
        "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
        "assertion": jwt
    }).encode()
    req = urllib.request.Request(
        "https://oauth2.googleapis.com/token",
        data=data,
        headers={"Content-Type": "application/x-www-form-urlencoded"}
    )
    with urllib.request.urlopen(req) as r:
        return json.loads(r.read())["access_token"]

def drive_list_files(token, query, page_token=None):
    params = {
        "q": query,
        "fields": "files(id,name,createdTime,mimeType,webViewLink),nextPageToken",
        "orderBy": "createdTime desc",
        "pageSize": "20"
    }
    if page_token:
        params["pageToken"] = page_token
    url = "https://www.googleapis.com/drive/v3/files?" + urllib.parse.urlencode(params)
    req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"})
    with urllib.request.urlopen(req) as r:
        return json.loads(r.read())

def drive_export_doc(token, file_id, mime_type="text/plain"):
    url = f"https://www.googleapis.com/drive/v3/files/{file_id}/export?mimeType={urllib.parse.quote(mime_type)}"
    req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"})
    with urllib.request.urlopen(req) as r:
        return r.read().decode("utf-8", errors="replace")

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--hours", type=float, default=3, help="Look back N hours")
    parser.add_argument("--keyword", type=str, default="", help="Filter by filename keyword")
    parser.add_argument("--save", type=str, default="", help="Save transcript to this file path")
    args = parser.parse_args()

    sa = load_sa_key()
    print(f"Getting access token for {DELEGATED_USER}...")
    token = get_access_token(sa, SCOPES, sub=DELEGATED_USER)
    print("Token obtained.")

    # Calculate cutoff time
    cutoff = datetime.now(timezone.utc) - timedelta(hours=args.hours)
    cutoff_str = cutoff.strftime("%Y-%m-%dT%H:%M:%S")

    # Search for Meet transcripts - they show up as Google Docs with "transcript" in name
    # or in a "Meet Recordings" folder
    query_parts = [
        f"createdTime > '{cutoff_str}'",
        "(name contains 'transcript' or name contains 'Transcript' or name contains 'Meet')",
        "trashed = false"
    ]
    if args.keyword:
        query_parts.append(f"name contains '{args.keyword}'")

    query = " and ".join(query_parts)
    print(f"\nSearching Drive for transcripts (last {args.hours}h)...")
    print(f"Query: {query}\n")

    result = drive_list_files(token, query)
    files = result.get("files", [])

    if not files:
        # Broader search
        print("No results with tight query. Trying broader search...")
        broad_query = f"createdTime > '{cutoff_str}' and trashed = false and (mimeType = 'application/vnd.google-apps.document')"
        result = drive_list_files(token, broad_query)
        files = result.get("files", [])
        # Filter client-side for Meet-related
        files = [f for f in files if any(k in f.get("name","").lower() for k in ["meet", "transcript", "recording", "call"])]

    if not files:
        print("No Meet transcripts found in the last", args.hours, "hours.")
        print("Make sure transcription was enabled in the call.")
        return

    print(f"Found {len(files)} file(s):\n")
    for f in files:
        print(f"  [{f['createdTime']}] {f['name']}")
        print(f"  ID: {f['id']}")
        print(f"  Link: {f.get('webViewLink','')}")
        print()

    # Pull the most recent one
    latest = files[0]
    print(f"Pulling transcript: {latest['name']}...")

    if latest.get("mimeType") == "application/vnd.google-apps.document":
        content = drive_export_doc(token, latest["id"])
    else:
        # Try plain download
        url = f"https://www.googleapis.com/drive/v3/files/{latest['id']}?alt=media"
        req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"})
        with urllib.request.urlopen(req) as r:
            content = r.read().decode("utf-8", errors="replace")

    print("\n" + "="*60)
    print(f"TRANSCRIPT: {latest['name']}")
    print(f"Created: {latest['createdTime']}")
    print("="*60 + "\n")
    # Print first 3000 chars as preview
    preview = content[:3000]
    print(preview)
    if len(content) > 3000:
        print(f"\n... [{len(content) - 3000} more characters]")

    if args.save:
        with open(args.save, "w") as out:
            out.write(f"# Transcript: {latest['name']}\n")
            out.write(f"Created: {latest['createdTime']}\n")
            out.write(f"Link: {latest.get('webViewLink','')}\n\n")
            out.write(content)
        print(f"\nSaved to: {args.save}")

if __name__ == "__main__":
    main()
