#!/usr/bin/env python3
"""
Harvey agent backup script.
Collects critical config/memory files, scrubs secrets, commits and pushes to GitHub.
"""

import os
import re
import sys
import json
import shutil
import subprocess
import tempfile
import datetime
from pathlib import Path

# ── Config ─────────────────────────────────────────────────────────────────────
REPO_URL_TEMPLATE = "https://{token}@github.com/mziarko/harvey-backup.git"
TOKEN_FILE = Path.home() / ".openclaw/secrets/github-backup-token.txt"
WORKSPACE = Path.home() / ".openclaw/workspace"
OPENCLAW_DIR = Path.home() / ".openclaw"
DISCORD_CHANNEL = "1490039402871652495"

# Files/dirs to back up (relative to WORKSPACE unless absolute)
WORKSPACE_FILES = [
    "SOUL.md", "MEMORY.md", "AGENTS.md", "USER.md", "IDENTITY.md",
    "TOOLS.md", "HEARTBEAT.md", "BOOTSTRAP.md",
]
WORKSPACE_DIRS = ["memory", "scripts", "tools", "sops", "docs"]

# ── Secret patterns → placeholder names ────────────────────────────────────────
SECRET_PATTERNS = [
    (r'sk-ant-[A-Za-z0-9\-_]{20,}', '[ANTHROPIC_API_KEY]'),
    (r'sk-proj-[A-Za-z0-9\-_]{20,}', '[OPENAI_API_KEY]'),
    (r'sk-[A-Za-z0-9]{20,}', '[OPENAI_API_KEY]'),
    (r'github_pat_[A-Za-z0-9_]{20,}', '[GITHUB_PAT]'),
    (r'ghp_[A-Za-z0-9]{20,}', '[GITHUB_TOKEN]'),
    (r'xoxb-[0-9\-A-Za-z]{20,}', '[SLACK_BOT_TOKEN]'),
    (r'xoxp-[0-9\-A-Za-z]{20,}', '[SLACK_USER_TOKEN]'),
    (r'xapp-[0-9\-A-Za-z]{20,}', '[SLACK_APP_TOKEN]'),
    (r'pit-[a-f0-9\-]{20,}', '[GHL_TOKEN]'),
    (r'"password"\s*:\s*"[^"]{8,}"', '"password": "[PASSWORD]"'),
    (r'"secret"\s*:\s*"[^"]{8,}"', '"secret": "[SECRET]"'),
    (r'"token"\s*:\s*"[^"]{8,}"', '"token": "[TOKEN]"'),
    (r'"apiKey"\s*:\s*"[^"]{8,}"', '"apiKey": "[API_KEY]"'),
    (r'"api_key"\s*:\s*"[^"]{8,}"', '"api_key": "[API_KEY]"'),
    (r'Bearer [A-Za-z0-9\-_\.]{20,}', 'Bearer [AUTH_TOKEN]'),
    # Tailscale IPs
    (r'100\.\d{1,3}\.\d{1,3}\.\d{1,3}', '[TAILSCALE_IP]'),
]

def scrub_secrets(content: str) -> tuple[str, list[str]]:
    """Replace secrets with placeholders. Returns (scrubbed_content, list_of_findings)."""
    findings = []
    for pattern, placeholder in SECRET_PATTERNS:
        matches = re.findall(pattern, content)
        if matches:
            findings.extend([f"{placeholder} (found {len(matches)}x)"])
            content = re.sub(pattern, placeholder, content)
    return content, findings

def read_and_scrub(path: Path) -> tuple[str, list[str]]:
    try:
        content = path.read_text(errors='replace')
        return scrub_secrets(content)
    except Exception as e:
        return f"# Error reading file: {e}\n", []

def run(cmd, cwd=None, capture=True, env=None):
    result = subprocess.run(
        cmd, shell=True, cwd=cwd,
        capture_output=capture, text=True, env=env
    )
    return result

def send_discord(message: str):
    """Send message to Discord via openclaw."""
    run(f'openclaw message --channel {DISCORD_CHANNEL} "{message}"')

def main():
    print(f"[backup] Starting Harvey agent backup — {datetime.datetime.now().isoformat()}")

    # ── Load token ─────────────────────────────────────────────────────────────
    if not TOKEN_FILE.exists():
        msg = "❌ Backup failed: github-backup-token.txt not found"
        print(msg)
        send_discord(msg)
        sys.exit(1)

    token = TOKEN_FILE.read_text().strip()
    if not token:
        msg = "❌ Backup failed: GitHub token file is empty"
        print(msg)
        send_discord(msg)
        sys.exit(1)

    repo_url = REPO_URL_TEMPLATE.format(token=token)

    # ── Clone repo to temp dir ─────────────────────────────────────────────────
    tmpdir = Path(tempfile.mkdtemp(prefix="harvey-backup-"))
    print(f"[backup] Working in {tmpdir}")

    try:
        result = run(f'git clone "{repo_url}" repo', cwd=tmpdir)
        if result.returncode != 0:
            msg = f"❌ Backup failed: git clone error — {result.stderr.strip()[:200]}"
            print(msg)
            send_discord(msg)
            sys.exit(1)

        repo_dir = tmpdir / "repo"

        # ── Configure git identity ─────────────────────────────────────────────
        run('git config user.email "harvey@nomorechores.com"', cwd=repo_dir)
        run('git config user.name "Harvey"', cwd=repo_dir)

        all_findings = []
        backed_up = []
        missing = []

        # ── Workspace files ────────────────────────────────────────────────────
        dest_ws = repo_dir / "workspace"
        dest_ws.mkdir(exist_ok=True)

        for fname in WORKSPACE_FILES:
            src = WORKSPACE / fname
            if src.exists():
                content, findings = read_and_scrub(src)
                (dest_ws / fname).write_text(content)
                backed_up.append(f"workspace/{fname}")
                if findings:
                    all_findings.extend([f"{fname}: {f}" for f in findings])
            else:
                missing.append(f"workspace/{fname}")

        # ── Workspace directories ──────────────────────────────────────────────
        for dname in WORKSPACE_DIRS:
            src_dir = WORKSPACE / dname
            if not src_dir.exists():
                missing.append(f"workspace/{dname}/")
                continue
            dest_subdir = dest_ws / dname
            dest_subdir.mkdir(exist_ok=True)
            for fpath in src_dir.rglob("*"):
                if fpath.is_file():
                    rel = fpath.relative_to(src_dir)
                    dest_file = dest_subdir / rel
                    dest_file.parent.mkdir(parents=True, exist_ok=True)
                    content, findings = read_and_scrub(fpath)
                    dest_file.write_text(content)
                    backed_up.append(f"workspace/{dname}/{rel}")
                    if findings:
                        all_findings.extend([f"{dname}/{rel}: {f}" for f in findings])

        # ── openclaw.json (main gateway config) ───────────────────────────────
        config_path = OPENCLAW_DIR / "openclaw.json"
        dest_config = repo_dir / "config"
        dest_config.mkdir(exist_ok=True)

        if config_path.exists():
            content, findings = read_and_scrub(config_path.read_text())
            (dest_config / "openclaw.json").write_text(content)
            backed_up.append("config/openclaw.json")
            if findings:
                all_findings.extend([f"openclaw.json: {f}" for f in findings])
        else:
            missing.append("config/openclaw.json")

        # ── Cron job definitions ───────────────────────────────────────────────
        cron_result = run("openclaw cron list --json 2>/dev/null || openclaw cron list 2>/dev/null")
        if cron_result.returncode == 0 and cron_result.stdout.strip():
            content, findings = scrub_secrets(cron_result.stdout)
            (dest_config / "cron-jobs.json").write_text(content)
            backed_up.append("config/cron-jobs.json")
            if findings:
                all_findings.extend([f"cron-jobs: {f}" for f in findings])
        else:
            missing.append("config/cron-jobs (openclaw cron list failed)")

        # ── Skills ────────────────────────────────────────────────────────────
        skills_src = Path.home() / "openclaw/skills"
        if skills_src.exists():
            dest_skills = repo_dir / "skills"
            dest_skills.mkdir(exist_ok=True)
            for skill_dir in skills_src.iterdir():
                if skill_dir.is_dir():
                    skill_md = skill_dir / "SKILL.md"
                    if skill_md.exists():
                        skill_dest = dest_skills / skill_dir.name
                        skill_dest.mkdir(exist_ok=True)
                        content, findings = read_and_scrub(skill_md)
                        (skill_dest / "SKILL.md").write_text(content)
                        backed_up.append(f"skills/{skill_dir.name}/SKILL.md")
                        if findings:
                            all_findings.extend([f"skills/{skill_dir.name}: {f}" for f in findings])
        else:
            missing.append("skills/")

        # ── Secret files list (names only, not contents) ───────────────────────
        secrets_dir = OPENCLAW_DIR / "secrets"
        if secrets_dir.exists():
            secret_names = [f.name for f in secrets_dir.iterdir() if f.is_file()]
            secret_manifest = "# Secrets manifest (filenames only — contents never backed up)\n"
            secret_manifest += "# To restore: create ~/.openclaw/secrets/ and populate each file.\n\n"
            for name in sorted(secret_names):
                secret_manifest += f"- {name}\n"
            (dest_config / "secrets-manifest.txt").write_text(secret_manifest)
            backed_up.append("config/secrets-manifest.txt")

        # ── Write backup summary ───────────────────────────────────────────────
        summary_lines = [
            f"# Harvey Backup Summary",
            f"Date: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} EDT",
            f"Files backed up: {len(backed_up)}",
            "",
        ]
        if missing:
            summary_lines += ["## Missing files", ""] + [f"- {m}" for m in missing] + [""]
        if all_findings:
            summary_lines += [
                f"## Secrets scrubbed ({len(all_findings)} findings)", "",
            ] + [f"- {f}" for f in all_findings] + [""]
        summary_lines += ["## Files backed up", ""] + [f"- {f}" for f in backed_up]

        (repo_dir / "BACKUP-SUMMARY.md").write_text("\n".join(summary_lines) + "\n")

        # ── Git commit and push ────────────────────────────────────────────────
        run("git add -A", cwd=repo_dir)

        # Check if there's anything to commit
        status = run("git status --porcelain", cwd=repo_dir)
        if not status.stdout.strip():
            msg = "✅ Harvey backup: no changes since last run — nothing to push"
            print(msg)
            send_discord(msg)
            return

        date_str = datetime.datetime.now().strftime("%Y-%m-%d")
        changed_count = len(status.stdout.strip().splitlines())
        scrubbed_note = f", {len(all_findings)} secrets scrubbed" if all_findings else ""
        commit_msg = f"backup: {date_str} — {changed_count} files changed{scrubbed_note}"
        if missing:
            commit_msg += f", {len(missing)} missing"

        commit_result = run(f'git commit -m "{commit_msg}"', cwd=repo_dir)
        if commit_result.returncode != 0:
            msg = f"❌ Backup failed: git commit error — {commit_result.stderr.strip()[:200]}"
            print(msg)
            send_discord(msg)
            sys.exit(1)

        push_result = run("git push origin main", cwd=repo_dir)
        if push_result.returncode != 0:
            # Try master branch
            push_result = run("git push origin master", cwd=repo_dir)
        if push_result.returncode != 0:
            msg = f"❌ Backup failed: git push error — {push_result.stderr.strip()[:200]}"
            print(msg)
            send_discord(msg)
            sys.exit(1)

        # ── Success ────────────────────────────────────────────────────────────
        scrubbed_note = f" ({len(all_findings)} secrets scrubbed)" if all_findings else ""
        missing_note = f" ⚠️ {len(missing)} missing" if missing else ""
        msg = f"✅ Harvey backup complete — {changed_count} files pushed to github.com/mziarko/harvey-backup{scrubbed_note}{missing_note}"
        print(msg)
        send_discord(msg)

    finally:
        shutil.rmtree(tmpdir, ignore_errors=True)

if __name__ == "__main__":
    main()
