from __future__ import annotations

import sys
import textwrap
from pathlib import Path

from PIL import Image, ImageDraw, ImageFont

REPO = Path('/home/.z/workspaces/con_2gAuSTkawPiOse8J/llm-foundry')
SRC = REPO / 'src'
OUT = Path('/home/workspace/Deliverables/llm-foundry-proof')
MODEL = 'Qwen/Qwen2.5-0.5B-Instruct'

if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))

from llm_foundry.adapters import HuggingFacePipelineBackend
from llm_foundry.benchmarks import BenchmarkCase, BenchmarkSuite
from llm_foundry.memory import CompressionEngine, ObsidianMemoryVault
from llm_foundry.rag import LocalRetriever
from llm_foundry.reasoning import ReflectionEngine


FONT = '/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf'


def wrap(line: str, width: int = 108) -> list[str]:
    if not line.strip():
        return ['']
    return textwrap.wrap(line, width=width, break_long_words=False, break_on_hyphens=False) or ['']


def truncate(text: str, max_chars: int) -> str:
    flat = ' '.join(text.split())
    return flat if len(flat) <= max_chars else flat[: max_chars - 1] + '…'


def build_transcript() -> list[str]:
    backend = HuggingFacePipelineBackend(MODEL, max_new_tokens=96)
    reflection = ReflectionEngine(backend)
    vault = ObsidianMemoryVault(OUT / 'memory-vault')
    compressor = CompressionEngine(vault=vault)
    retriever = LocalRetriever(REPO)
    suite = BenchmarkSuite(backend)

    task_prompt = (
        'You are helping ship an assistant. In one tight paragraph, explain how semantic retrieval, '
        'compression, benchmarks, and multi-provider failover help real work. Keep it practical.'
    )
    result = reflection.answer(task_prompt)

    transcript = [
        'We need the model to do useful work, not just generate prose.',
        'Semantic retrieval should surface the right context when wording changes.',
        'Compression should cut the clutter before the prompt gets expensive.',
        'Benchmarks should tell us whether it is actually getting better.',
        'Failover should keep the stack alive when one endpoint misbehaves.',
    ]
    context = compressor.compress_transcript(
        task='Write an internal memo about how LLM Foundry helps a real assistant do work',
        transcript=transcript,
        memory_query='semantic retrieval compression benchmarks failover',
        target_tokens=180,
    )

    hits = retriever.search('semantic retrieval compression benchmarks failover', top_k=3)
    report = suite.run([
        BenchmarkCase(
            name='concise_instruction',
            prompt='Reply with exactly: blue',
            expected_exact='blue',
        ),
        BenchmarkCase(
            name='reasoning_keywords',
            prompt='Explain why a model should verify its own answer in one sentence.',
            expected_contains=('verify', 'answer'),
        ),
    ])

    lines: list[str] = []
    lines += [
        '== LLM Foundry local model proof ==',
        f'model={MODEL}',
        'backend=HuggingFacePipelineBackend',
        'goal=make the local model do useful work with the surrounding stack',
        '',
        '$ python -m llm_foundry demo --backend hf --model Qwen/Qwen2.5-0.5B-Instruct --prompt "Explain how semantic retrieval, compression, benchmarks, and failover help a real assistant do work."',
        '',
        'DRAFT:',
        truncate(result.draft, 2200),
        '',
        'CRITIQUE:',
        truncate(result.critique, 1800),
        '',
        'FINAL:',
        truncate(result.final, 1800),
        '',
        '$ python -m llm_foundry compress --task "Write an internal memo about how LLM Foundry helps a real assistant do work" --transcript-file transcript.txt --memory-root memory-vault --memory-query "semantic retrieval compression benchmarks failover"',
        '',
        f'before_tokens={context.token_estimate_before}',
        f'after_tokens={context.token_estimate_after}',
        'compressed_prompt:',
    ]
    lines += context.to_prompt().splitlines()
    lines += [
        '',
        '$ python -m llm_foundry index --root . --query "semantic retrieval compression benchmarks failover" --top-k 3',
        '',
    ]
    if hits:
        for hit in hits:
            lines.append(f'{hit.path} | score={hit.score:.3f} | {truncate(hit.text, 180)}')
    else:
        lines.append('no hits')
    lines += [
        '',
        '$ python -m llm_foundry benchmark --backend hf --model Qwen/Qwen2.5-0.5B-Instruct --case concise_instruction --case reasoning_keywords',
        '',
        f'passed={report.passed}/{report.total}',
        f'pass_rate={report.pass_rate:.2%}',
    ]
    for item in report.results:
        lines.append(
            f'{item.name}: passed={str(item.passed).lower()} exact={str(item.exact_match).lower()} '
            f'keyword_hits={item.keyword_hits} risk={item.delayed_harm_risk:.3f}'
        )
    lines += [
        '',
        '== What this proves ==',
        'The local model is being used inside a real workflow: it answers, is reflected on, is combined with memory compression, is paired with retrieval, and is measured with benchmarks.',
        '',
        'GitHub: https://github.com/AmSach/llm-foundry',
        'Instagram: https://www.instagram.com/i.amsach',
        'LinkedIn: https://www.linkedin.com/in/theamansachan',
    ]
    return lines


def render_terminal(lines: list[str], path: Path, title: str) -> None:
    width, height = 1600, 980
    bg = '#07111f'
    panel = '#0f1930'
    border = '#2b426d'
    text_color = '#e7ecff'
    dim = '#8ca0d1'
    accent = '#86efac'
    model_color = '#f9e2af'
    font = ImageFont.truetype(FONT, 24)
    title_font = ImageFont.truetype(FONT, 34)

    img = Image.new('RGB', (width, height), bg)
    d = ImageDraw.Draw(img)
    d.rounded_rectangle((28, 28, width - 28, height - 28), radius=26, fill=panel, outline=border, width=3)
    d.text((56, 52), title, fill=text_color, font=title_font)
    d.text((56, 104), 'Terminal output from a real local run', fill=dim, font=font)

    y = 158
    line_h = 34
    for line in lines:
        color = text_color
        if line.startswith('$ '):
            color = accent
        elif line.startswith('=='):
            color = model_color
        elif line.startswith('GitHub:') or line.startswith('Instagram:') or line.startswith('LinkedIn:'):
            color = '#7dd3fc'
        for part in wrap(line, width=108):
            d.text((56, y), part, fill=color, font=font)
            y += line_h
            if y > height - 60:
                break
        if y > height - 60:
            break
    path.parent.mkdir(parents=True, exist_ok=True)
    img.save(path)


def main() -> None:
    OUT.mkdir(parents=True, exist_ok=True)
    lines = build_transcript()
    (OUT / 'terminal_transcript.txt').write_text('\n'.join(lines))

    # Split into three screenshot-like terminal captures.
    segments = []
    markers = [
        '== Memory compression ==',
        '$ python -m llm_foundry index --root . --query',
        '== What this proves ==',
    ]
    start = 0
    for marker in markers:
        for idx in range(start, len(lines)):
            if lines[idx].startswith(marker):
                segments.append(lines[start:idx])
                start = idx
                break
    segments.append(lines[start:])

    names = ['01-demo-and-answer.png', '02-compression-and-retrieval.png', '03-benchmark-and-proof.png']
    titles = ['LLM Foundry proof: local model answer', 'LLM Foundry proof: memory + retrieval', 'LLM Foundry proof: benchmark + links']
    for name, title, seg in zip(names, titles, segments):
        render_terminal(seg, OUT / name, title)

    report = OUT / 'report.html'
    report.write_text(
        '<!doctype html><html><head><meta charset="utf-8"/>'
        '<meta name="viewport" content="width=device-width, initial-scale=1"/>'
        '<title>LLM Foundry local model proof</title>'
        '<style>body{font-family:system-ui,sans-serif;background:#0b1020;color:#e8ecff;margin:0}'
        '.wrap{max-width:1100px;margin:0 auto;padding:32px}.card{background:#111b36;border:1px solid #2a3b63;border-radius:18px;padding:20px;margin:18px 0}'
        'img{width:100%;border-radius:14px;border:1px solid #2a3b63;margin:10px 0}</style></head><body><div class="wrap">'
        '<div class="card"><h1>LLM Foundry local model proof</h1><p>Real local model run, real terminal output, real screenshots.</p></div>'
        + ''.join(
            f'<div class="card"><h2>{title}</h2><img src="{name}"/></div>'
            for title, name in zip(
                ['Answer generation', 'Memory compression + retrieval', 'Benchmark + proof links'],
                names,
            )
        )
        + '</div></body></html>'
    )
    print(OUT)


if __name__ == '__main__':
    main()
