#!/usr/bin/env python3
"""
domain-recon-ads: Company research → marketing brief → Meta ad launch.

Usage:
  python Skills/domain-recon-ads/scripts/recon.py <url> [options]

Options:
  --ads                 Launch Meta ad campaign after research
  --ad-account ID       Override META_AD_ACCOUNT_ID
  --page-id ID          Override META_PAGE_ID
  --budget CENTS        Daily budget in cents (default: 2000 = $20/day)
  --objective TYPE      AWARENESS | TRAFFIC | CONVERSIONS (default: TRAFFIC)
  --skip-landing-page   Don't create a zo.space landing page
  --output-dir DIR      Override output directory
  --verbose             Print detailed progress
"""

import argparse
import asyncio
import json
import os
import re
import sys
import time
from pathlib import Path
from urllib.parse import urlparse

import aiohttp

API_URL = "https://api.zo.computer/zo/ask"
MODEL = "openai:gpt-5.3-codex"
AUTH_TOKEN = os.environ.get("ZO_CLIENT_IDENTITY_TOKEN", "")
MAX_CONCURRENT = 8


def slug_from_url(url: str) -> str:
    host = urlparse(url).hostname or "unknown"
    return re.sub(r"[^a-z0-9]+", "-", host.lower()).strip("-")


def extract_first_abs_path(text: str) -> str:
    m = re.search(r"(/[^\s]+\.(?:png|jpg|jpeg|webp))", text)
    return m.group(1) if m else ""


async def zo_ask(session: aiohttp.ClientSession, prompt: str, sem: asyncio.Semaphore) -> str:
    async with sem:
        try:
            async with session.post(
                API_URL,
                headers={
                    "authorization": AUTH_TOKEN,
                    "content-type": "application/json",
                },
                json={"input": prompt, "model_name": MODEL},
                timeout=aiohttp.ClientTimeout(total=300),
            ) as resp:
                data = await resp.json()
                return data.get("output", "")
        except Exception as e:
            return f"[ERROR: {e}]"


async def generate_ad_image(
    session: aiohttp.ClientSession,
    sem: asyncio.Semaphore,
    url: str,
    slug: str,
    output_dir: Path,
    brief: str,
    ad_copy: str,
    verbose: bool,
) -> str:
    image_prompt = f"""Create one compelling 1:1 Meta ad image for {url}.

Context:
- Company URL: {url}
- Domain slug: {slug}
- Brief (truncated): {brief[:3500]}
- Ad copy variants (truncated): {ad_copy[:3000]}

Steps:
1) Choose the strongest ad angle from the provided brief/copy.
2) Write a concise visual concept for a high-converting paid social image.
3) Use generate_image to create a single image with aspect ratio 1:1.
   - Avoid excessive text in the image.
   - If text is included, keep it very short and legible.
4) After generation, copy the image file into this exact directory:
   {output_dir}
   and name it exactly:
   ad-image.png
   (use shell commands if needed)
5) Respond with:
   - IMAGE_PATH: absolute path to ad-image.png
   - PROMPT_USED: the final generation prompt used

Return only those two lines."""

    result = await zo_ask(session, image_prompt, sem)
    (output_dir / "ad-image-generation.md").write_text(result)
    image_path = extract_first_abs_path(result)

    if image_path and Path(image_path).exists():
        if verbose:
            print(f"    Ad image generated: {image_path}")
        return image_path

    # Fallback: if child returned a path but not in output_dir, try locating and copying
    if image_path and Path(image_path).exists() and Path(image_path).suffix.lower() in {".png", ".jpg", ".jpeg", ".webp"}:
        target = output_dir / "ad-image.png"
        try:
            target.write_bytes(Path(image_path).read_bytes())
            if verbose:
                print(f"    Ad image copied to: {target}")
            return str(target)
        except Exception:
            pass

    print("    ! Ad image generation did not return a usable file path; continuing without image")
    return ""


async def run_research(url: str, slug: str, output_dir: Path, verbose: bool):
    sem = asyncio.Semaphore(MAX_CONCURRENT)
    raw_dir = output_dir / "raw"
    raw_dir.mkdir(parents=True, exist_ok=True)

    async with aiohttp.ClientSession() as session:

        # --- Wave 1: Scrape + initial searches (parallel) ---
        print("  [1/4] Scraping homepage & running searches...")

        scrape_prompt = f"""Scrape and summarize the website at {url}.

Steps:
1. Use read_webpage to fetch {url}
2. Look for links to About, Pricing, Docs, Blog pages — fetch the top 2-3 most informative ones
3. For each page fetched, write a concise summary

Output a structured report with sections:
- **Homepage Summary**: What the product/company does (2-3 sentences)
- **Key Pages Found**: List of pages you fetched with 1-sentence summaries
- **Product/Service**: What they sell or offer
- **Pricing Model**: Free, freemium, paid, enterprise — any pricing info found
- **Tech Signals**: Any technical stack, API, integrations mentioned
- **Target Audience**: Who this is for based on messaging
- **Notable Claims**: Key marketing claims or differentiators

Save all fetched markdown to {raw_dir}/ with descriptive filenames.
Be thorough but concise."""

        founder_prompt = f"""Research the founder(s) and team behind {url}.

Steps:
1. Search the web for the founder/CEO of {urlparse(url).hostname}
2. Search LinkedIn for the founder (use web_research with category=linkedin_profile)
3. Search X/Twitter for the founder (use web_research with category=tweet)
4. Look for About/Team pages on the site itself

Output a structured profile:
- **Name(s)**: Founder/CEO name(s)
- **Role(s)**: Title(s)
- **Background**: Brief professional history
- **Social Profiles**: X handle, LinkedIn URL if found
- **Notable**: Anything interesting — previous companies, expertise, public presence
- **Team Size Signals**: Any indication of company size

If you can't find founder info, say so and note what you did find about the team."""

        competitor_prompt = f"""Research the competitive landscape for the product at {url}.

Steps:
1. First, fetch {url} with read_webpage to understand what the product does
2. Search for competitors and alternatives (try queries like "[product] alternatives", "[product category] tools")
3. Use web_research with category=company for top competitors
4. Look for comparison pages, G2/Capterra listings, or "vs" content

Output a structured analysis:
- **Category**: What market category this product is in
- **Direct Competitors**: 3-5 closest competitors with 1-sentence descriptions
- **Indirect Competitors**: 2-3 adjacent products
- **Differentiation**: How does this product position itself differently?
- **Market Signals**: Growing/mature market? Recent funding in the space? Trends?
- **Pricing Comparison**: If pricing data available for competitors"""

        results = await asyncio.gather(
            zo_ask(session, scrape_prompt, sem),
            zo_ask(session, founder_prompt, sem),
            zo_ask(session, competitor_prompt, sem),
        )

        scrape_result, founder_result, competitor_result = results

        (output_dir / "raw" / "scrape.md").write_text(scrape_result)
        (output_dir / "founder.md").write_text(founder_result)
        (output_dir / "competitors.md").write_text(competitor_result)

        if verbose:
            print(f"    Scrape: {len(scrape_result)} chars")
            print(f"    Founder: {len(founder_result)} chars")
            print(f"    Competitors: {len(competitor_result)} chars")

        # --- Wave 2: Synthesize brief ---
        print("  [2/4] Synthesizing research brief...")

        brief_prompt = f"""You are writing a comprehensive marketing research brief for {url}.

Here is the raw research:

## HOMEPAGE & PRODUCT ANALYSIS
{scrape_result}

## FOUNDER & TEAM
{founder_result}

## COMPETITIVE LANDSCAPE
{competitor_result}

---

Synthesize this into a polished **Marketing Research Brief** with these sections:

# Marketing Research Brief: [Company Name]
**URL**: {url}
**Date**: {time.strftime("%Y-%m-%d")}

## Executive Summary
2-3 paragraph overview: what the company does, who founded it, where it sits in the market.

## Product Analysis
- Core offering
- Key features
- Pricing model
- Target audience
- Tech stack / integrations

## Founder & Team
- Background
- Relevant experience
- Public presence & credibility

## Market Position
- Category
- Key competitors
- Differentiation
- Market trends

## Strengths & Opportunities
- What they do well
- Gaps in their marketing/product
- Angles for advertising

## Recommended Marketing Angles
- 3-5 specific angles for ad campaigns
- For each: headline concept, target emotion, audience segment

Write in clear, professional prose. Be specific — use actual data from the research, not generic advice."""

        brief = await zo_ask(session, brief_prompt, sem)
        (output_dir / "brief.md").write_text(brief)
        print(f"    Brief saved ({len(brief)} chars)")

        # --- Wave 3: Marketing assets (parallel) ---
        print("  [3/4] Generating marketing assets...")

        ad_copy_prompt = f"""Based on this research brief, generate ad copy variants for Meta (Facebook/Instagram) ads.

## BRIEF
{brief[:6000]}

---

Generate **5 ad copy variants**, each targeting a different angle/audience:

For each variant:
1. **Angle**: What emotion/need this targets (1 sentence)
2. **Audience**: Who this is for
3. **Headline**: Max 40 chars
4. **Primary Text**: 125 chars max (shows above the image)
5. **Description**: 30 chars max (shows below headline)
6. **CTA Button**: One of: Learn More, Sign Up, Get Started, Shop Now, Download, Contact Us

Also suggest:
- **Image direction**: What the ad image should convey (for each variant)
- **A/B test ideas**: 2 small variations to test

Format as clean markdown with clear sections for each variant."""

        audience_prompt = f"""Based on this research brief, recommend Meta Ads targeting for {url}.

## BRIEF
{brief[:6000]}

---

Create detailed targeting recommendations:

## Core Audiences (3 segments)
For each:
- **Name**: Descriptive segment name
- **Demographics**: Age range, gender, location
- **Interests**: Meta interest targeting categories (be specific — use actual Meta interest names)
- **Behaviors**: Purchase behavior, device usage, etc.
- **Estimated reach**: Rough guess (narrow/medium/broad)

## Lookalike Audiences
- What seed audience to use (site visitors, purchasers, etc.)
- Recommended percentage (1%, 2%, 5%)
- Geographic targeting

## Retargeting
- Pixel events to track
- Retargeting windows
- Sequential messaging strategy

## Budget Allocation
- Recommended split across segments
- Testing budget vs. scaling budget
- Expected CPM range for this vertical

Be specific to the product and market — not generic advice."""

        ad_results = await asyncio.gather(
            zo_ask(session, ad_copy_prompt, sem),
            zo_ask(session, audience_prompt, sem),
        )

        (output_dir / "ad-copy.md").write_text(ad_results[0])
        (output_dir / "audience.md").write_text(ad_results[1])

        image_path = await generate_ad_image(
            session=session,
            sem=sem,
            url=url,
            slug=slug,
            output_dir=output_dir,
            brief=brief,
            ad_copy=ad_results[0],
            verbose=verbose,
        )

        if verbose:
            print(f"    Ad copy: {len(ad_results[0])} chars")
            print(f"    Audience: {len(ad_results[1])} chars")

        if image_path:
            print(f"    Ad image: {image_path}")

        print("  [4/4] Research complete.")
        return brief, ad_results[0], ad_results[1], image_path


async def launch_meta_ads(
    url: str,
    slug: str,
    output_dir: Path,
    brief: str,
    ad_copy: str,
    audience: str,
    ad_account: str,
    page_id: str,
    budget_cents: int,
    objective: str,
    verbose: bool,
    image_path: str = "",
):
    """Launch a Meta ad campaign using the Marketing API."""
    sem = asyncio.Semaphore(MAX_CONCURRENT)
    access_token = os.environ.get("META_ACCESS_TOKEN", "")
    if not access_token:
        print("  ✗ META_ACCESS_TOKEN not set. Skipping ad launch.")
        print("    See: Skills/domain-recon-ads/references/meta-ads-setup.md")
        return

    if not ad_account:
        print("  ✗ META_AD_ACCOUNT_ID not set. Skipping ad launch.")
        return

    if not page_id:
        print("  ✗ META_PAGE_ID not set. Skipping ad launch.")
        return

    api_base = "https://graph.facebook.com/v21.0"
    campaign_log = {"url": url, "slug": slug, "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ")}

    async with aiohttp.ClientSession() as session:

        # 1. Create Campaign
        print("  [ads 1/3] Creating campaign...")
        objective_map = {
            "AWARENESS": "OUTCOME_AWARENESS",
            "TRAFFIC": "OUTCOME_TRAFFIC",
            "CONVERSIONS": "OUTCOME_SALES",
        }
        meta_objective = objective_map.get(objective, "OUTCOME_TRAFFIC")

        async with session.post(
            f"{api_base}/{ad_account}/campaigns",
            data={
                "name": f"[Zo Recon] {slug} - {time.strftime('%Y%m%d')}",
                "objective": meta_objective,
                "status": "PAUSED",
                "special_ad_categories": "[]",
                "access_token": access_token,
            },
        ) as resp:
            campaign_data = await resp.json()
            if "error" in campaign_data:
                print(f"  ✗ Campaign creation failed: {campaign_data['error'].get('message', campaign_data['error'])}")
                campaign_log["error"] = campaign_data["error"]
                (output_dir / "campaign-log.json").write_text(json.dumps(campaign_log, indent=2))
                return
            campaign_id = campaign_data["id"]
            campaign_log["campaign_id"] = campaign_id
            print(f"    Campaign created: {campaign_id}")

        # 2. Create Ad Set
        print("  [ads 2/3] Creating ad set...")
        daily_budget = budget_cents

        targeting = {
            "geo_locations": {"countries": ["US"]},
            "age_min": 25,
            "age_max": 55,
            "publisher_platforms": ["facebook", "instagram"],
        }

        async with session.post(
            f"{api_base}/{ad_account}/adsets",
            data={
                "name": f"[Zo Recon] {slug} - Primary",
                "campaign_id": campaign_id,
                "daily_budget": str(daily_budget),
                "billing_event": "IMPRESSIONS",
                "optimization_goal": "LINK_CLICKS",
                "bid_strategy": "LOWEST_COST_WITHOUT_CAP",
                "targeting": json.dumps(targeting),
                "status": "PAUSED",
                "start_time": time.strftime("%Y-%m-%dT%H:%M:%S+0000"),
                "access_token": access_token,
            },
        ) as resp:
            adset_data = await resp.json()
            if "error" in adset_data:
                print(f"  ✗ Ad set creation failed: {adset_data['error'].get('message', adset_data['error'])}")
                campaign_log["adset_error"] = adset_data["error"]
            else:
                adset_id = adset_data["id"]
                campaign_log["adset_id"] = adset_id
                print(f"    Ad set created: {adset_id}")

                # 3. Create Ad Creative + Ad
                print("  [ads 3/3] Creating ad creative...")

                # Extract first headline/body from ad copy (simple parse)
                headline = f"Discover {slug.replace('-', ' ').title()}"
                body = f"Learn how {slug.replace('-', ' ')} can help you. Click to explore."
                link = url

                image_hash = ""
                if image_path and Path(image_path).exists():
                    try:
                        form = aiohttp.FormData()
                        form.add_field("access_token", access_token)
                        with open(image_path, "rb") as f:
                            form.add_field("filename", f.read(), filename=Path(image_path).name, content_type="image/png")
                            async with session.post(f"{api_base}/{ad_account}/adimages", data=form) as img_resp:
                                img_data = await img_resp.json()
                        if "error" in img_data:
                            print(f"    ! Image upload failed, using text-only creative: {img_data['error'].get('message', img_data['error'])}")
                            campaign_log["image_upload_error"] = img_data["error"]
                        else:
                            images = img_data.get("images", {})
                            if images:
                                first_key = next(iter(images.keys()))
                                image_hash = images[first_key].get("hash", "")
                                campaign_log["image_hash"] = image_hash
                                campaign_log["image_path"] = image_path
                                if verbose:
                                    print(f"    Image uploaded with hash: {image_hash}")
                    except Exception as e:
                        print(f"    ! Image upload exception, using text-only creative: {e}")
                        campaign_log["image_upload_exception"] = str(e)

                link_data = {
                    "link": link,
                    "message": body,
                    "name": headline,
                    "call_to_action": {
                        "type": "LEARN_MORE",
                        "value": {"link": link},
                    },
                }
                if image_hash:
                    link_data["image_hash"] = image_hash

                async with session.post(
                    f"{api_base}/{ad_account}/adcreatives",
                    data={
                        "name": f"[Zo Recon] {slug} - Creative 1",
                        "object_story_spec": json.dumps({
                            "page_id": page_id,
                            "link_data": link_data,
                        }),
                        "access_token": access_token,
                    },
                ) as resp:
                    creative_data = await resp.json()
                    if "error" in creative_data:
                        print(f"  ✗ Creative creation failed: {creative_data['error'].get('message', creative_data['error'])}")
                        campaign_log["creative_error"] = creative_data["error"]
                    else:
                        creative_id = creative_data["id"]
                        campaign_log["creative_id"] = creative_id
                        print(f"    Creative created: {creative_id}")

                        # Create the Ad
                        async with session.post(
                            f"{api_base}/{ad_account}/ads",
                            data={
                                "name": f"[Zo Recon] {slug} - Ad 1",
                                "adset_id": adset_id,
                                "creative": json.dumps({"creative_id": creative_id}),
                                "status": "PAUSED",
                                "access_token": access_token,
                            },
                        ) as resp:
                            ad_data = await resp.json()
                            if "error" in ad_data:
                                print(f"  ✗ Ad creation failed: {ad_data['error'].get('message', ad_data['error'])}")
                                campaign_log["ad_error"] = ad_data["error"]
                            else:
                                ad_id = ad_data["id"]
                                campaign_log["ad_id"] = ad_id
                                print(f"    Ad created: {ad_id} (PAUSED — review before enabling)")

    (output_dir / "campaign-log.json").write_text(json.dumps(campaign_log, indent=2))
    print(f"  Campaign log saved to {output_dir / 'campaign-log.json'}")


async def main():
    parser = argparse.ArgumentParser(description="Domain recon + ads")
    parser.add_argument("url", help="Homepage URL to research")
    parser.add_argument("--ads", action="store_true", help="Launch Meta ad campaign")
    parser.add_argument("--ad-account", default=os.environ.get("META_AD_ACCOUNT_ID", ""), help="Meta ad account ID")
    parser.add_argument("--page-id", default=os.environ.get("META_PAGE_ID", ""), help="Facebook Page ID")
    parser.add_argument("--budget", type=int, default=2000, help="Daily budget in cents (default: 2000)")
    parser.add_argument("--objective", default="TRAFFIC", choices=["AWARENESS", "TRAFFIC", "CONVERSIONS"])
    parser.add_argument("--skip-landing-page", action="store_true")
    parser.add_argument("--output-dir", default=None)
    parser.add_argument("--verbose", action="store_true")
    args = parser.parse_args()

    url = args.url
    if not url.startswith("http"):
        url = "https://" + url

    slug = slug_from_url(url)
    output_dir = Path(args.output_dir) if args.output_dir else Path(f"/home/workspace/output/domain-recon-ads/{slug}")
    output_dir.mkdir(parents=True, exist_ok=True)

    print(f"═══ domain-recon-ads ═══")
    print(f"  URL:    {url}")
    print(f"  Slug:   {slug}")
    print(f"  Output: {output_dir}")
    print(f"  Ads:    {'yes' if args.ads else 'no'}")
    print()

    if not AUTH_TOKEN:
        print("ERROR: ZO_CLIENT_IDENTITY_TOKEN not set. Cannot run research.")
        sys.exit(1)

    # Phase 1-2: Research
    print("Phase 1-2: Research & Marketing Assets")
    brief, ad_copy, audience, image_path = await run_research(url, slug, output_dir, args.verbose)

    # Phase 3: Meta Ads
    if args.ads:
        print()
        print("Phase 3: Meta Ads")
        await launch_meta_ads(
            url=url,
            slug=slug,
            output_dir=output_dir,
            brief=brief,
            ad_copy=ad_copy,
            audience=audience,
            ad_account=args.ad_account,
            page_id=args.page_id,
            budget_cents=args.budget,
            objective=args.objective,
            verbose=args.verbose,
            image_path=image_path,
        )

    print()
    print(f"═══ Done ═══")
    print(f"  Output: {output_dir}")
    print(f"  Files:")
    for f in sorted(output_dir.rglob("*")):
        if f.is_file():
            print(f"    {f.relative_to(output_dir)}")


if __name__ == "__main__":
    asyncio.run(main())
