import asyncio
import base64
import json
import logging
import time
import argparse
import uuid
import sys
import statistics
import os
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple
import httpx
from pydantic import BaseModel

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[logging.StreamHandler(sys.stdout)]
)
logger = logging.getLogger("api_benchmarker")

# Constants
DEFAULT_BASE_URL = "http://localhost:8001"
DEFAULT_API_KEY = "your-secret-key-here"
CHARS_PER_TOKEN = 4

# Data Generators
def get_dummy_base64_image():
    return "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII="

def get_dummy_base64_audio():
    # A bit longer dummy audio to pass validation (44 bytes header + some data)
    return "UklGRigAAABXQVZFZm10IBAAAAABAAEARKwAAIhYAQACABAAZGF0YQQAAAAAAA" + "A" * 100 + "=="

def generate_context_text(tokens: int) -> str:
    """Generate synthetic text of approximately 'tokens' tokens."""
    base_phrase = "The quick brown fox jumps over the lazy dog. "
    repeat_count = (tokens * CHARS_PER_TOKEN) // len(base_phrase) + 1
    return (base_phrase * repeat_count)[:tokens * CHARS_PER_TOKEN]

# Metric Models
class RequestMetric(BaseModel):
    task_name: str
    endpoint: str
    status_code: int
    ttfb_ms: float
    total_ms: float
    success: bool
    tokens: int
    error: Optional[str] = None

class BenchStats:
    def __init__(self, name: str):
        self.name = name
        self.metrics: List[RequestMetric] = []
        self.start_time = 0.0
        self.end_time = 0.0

    def add(self, m: RequestMetric):
        self.metrics.append(m)

    def get_summary(self) -> Dict[str, Any]:
        if not self.metrics:
            return {}
        
        total = len(self.metrics)
        successes = [m for m in self.metrics if m.success]
        success_count = len(successes)
        fail_count = total - success_count
        
        total_latencies = [m.total_ms for m in successes] if successes else [0]
        ttfb_latencies = [m.ttfb_ms for m in successes] if successes else [0]
        
        duration = self.end_time - self.start_time
        total_tokens = sum(m.tokens for m in successes)
        
        return {
            "name": self.name,
            "total_requests": total,
            "success_rate": (success_count / total) * 100 if total > 0 else 0,
            "avg_latency": statistics.mean(total_latencies),
            "p50_latency": statistics.median(total_latencies),
            "p95_latency": sorted(total_latencies)[int(len(total_latencies)*0.95)] if total_latencies else 0,
            "avg_ttfb": statistics.mean(ttfb_latencies),
            "tps": total_tokens / duration if duration > 0 else 0,
            "rps": total / duration if duration > 0 else 0,
            "duration": duration
        }

# Benchmarking Engine
class ApiBenchmarker:
    def __init__(self, base_url: str, api_key: str):
        self.base_url = base_url
        self.api_key = api_key
        self.headers = {"X-API-Key": api_key}
        self.semaphores = {
            "completions": asyncio.Semaphore(5),
            "ocr": asyncio.Semaphore(2),
            "convert": asyncio.Semaphore(2),
            "tts-asr": asyncio.Semaphore(3)
        }
        self.results: Dict[str, BenchStats] = {}

    async def _execute_request(self, client: httpx.AsyncClient, name: str, method: str, path: str, **kwargs) -> RequestMetric:
        url = f"{self.base_url}{path}"
        start = time.perf_counter()
        ttfb = 0.0
        tokens_count = 0
        
        # Estimate input + output tokens (mock for output)
        if "json" in kwargs:
            input_text = str(kwargs["json"].get("prefix", "")) + str(kwargs["json"].get("text", ""))
            tokens_count += len(input_text) // CHARS_PER_TOKEN

        try:
            async with client.stream(method, url, **kwargs) as response:
                ttfb = (time.perf_counter() - start) * 1000
                
                body = await response.aread()
                total_ms = (time.perf_counter() - start) * 1000
                success = 200 <= response.status_code < 300
                
                error_msg = None
                if not success:
                    error_msg = body.decode(errors="ignore")[:200]
                else:
                    # Estimate output tokens from response content
                    try:
                        resp_json = json.loads(body)
                        content = resp_json.get("content", "") or resp_json.get("text", "") or resp_json.get("markdown", "")
                        tokens_count += len(content) // CHARS_PER_TOKEN
                    except:
                        pass

                return RequestMetric(
                    task_name=name,
                    endpoint=path,
                    status_code=response.status_code,
                    ttfb_ms=ttfb,
                    total_ms=total_ms,
                    success=success,
                    tokens=tokens_count,
                    error=error_msg
                )
        except Exception as e:
            total_ms = (time.perf_counter() - start) * 1000
            return RequestMetric(
                task_name=name,
                endpoint=path,
                status_code=0,
                ttfb_ms=ttfb or total_ms,
                total_ms=total_ms,
                success=False,
                tokens=tokens_count,
                error=str(e)
            )

    async def run_task(self, client: httpx.AsyncClient, task_type: str, name: str, iterations: int):
        if name not in self.results:
            self.results[name] = BenchStats(name)
        
        stats = self.results[name]
        stats.start_time = time.perf_counter()
        
        sem = self.semaphores.get(task_type, self.semaphores["completions"])
        
        async def worker():
            async with sem:
                if task_type == "completions":
                    # Stability Test Variation
                    prefix_len = 100
                    if "Normal" in name: prefix_len = 1000
                    if "Long" in name: prefix_len = 4000
                    
                    metric = await self._execute_request(client, name, "POST", "/v1/completions", json={
                        "prefix": generate_context_text(prefix_len),
                        "suffix": "End of document.",
                        "model_thinking": "low"
                    })
                elif task_type == "ocr":
                    metric = await self._execute_request(client, name, "POST", "/v1/ocr", json={
                        "image": get_dummy_base64_image(),
                        "filename": "bench.png"
                    })
                elif task_type == "convert":
                    metric = await self._execute_request(client, name, "POST", "/v1/convert", json={
                        "file": base64.b64encode(b"Performance test data").decode(),
                        "filename": "bench.txt"
                    })
                elif task_type == "tts":
                    metric = await self._execute_request(client, name, "POST", "/v1/tts-asr/tts", json={
                        "text": "This is a performance benchmark for the text to speech engine.",
                        "voice": "v2/en_speaker_6",
                        "format": "wav"
                    })
                elif task_type == "asr":
                    metric = await self._execute_request(client, name, "POST", "/v1/tts-asr/asr", json={
                        "audio_base64": get_dummy_base64_audio(),
                        "language": "en"
                    })
                else:
                    metric = await self._execute_request(client, name, "GET", "/v1/tts-asr/status")
                
                stats.add(metric)

        tasks = [worker() for _ in range(iterations)]
        await asyncio.gather(*tasks)
        stats.end_time = time.perf_counter()

    def generate_report(self, output_file: str):
        report = []
        report.append(f"# API Benchmarking Report ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})")
        report.append(f"\n**Base URL:** `{self.base_url}`")
        
        # Summary Table
        report.append("\n## Executive Summary")
        report.append("| Task | Success Rate | Avg TTFB | Avg Latency | P95 Latency | TPS | RPS |")
        report.append("| :--- | :--- | :--- | :--- | :--- | :--- | :--- |")
        
        for name, stats in self.results.items():
            s = stats.get_summary()
            if not s: continue
            report.append(f"| {s['name']} | {s['success_rate']:.1f}% | {s['avg_ttfb']:.1f}ms | {s['avg_latency']:.1f}ms | {s['p95_latency']:.1f}ms | {s['tps']:.1f} | {s['rps']:.2f} |")
        
        # Stability Analysis
        report.append("\n## Stability & Context Analysis")
        report.append("Detailed analysis of how context length affects TTFB and overall performance.")
        
        # Details per category
        for name, stats in self.results.items():
            s = stats.get_summary()
            if not s: continue
            report.append(f"\n### {name} Details")
            report.append(f"- **Total Samples:** {s['total_requests']}")
            report.append(f"- **Duration:** {s['duration']:.2f}s")
            failures = [m for m in stats.metrics if not m.success]
            if failures:
                report.append(f"- **Top Errors:**")
                for f in failures[:3]:
                    report.append(f"  - `[{f.status_code}]` {f.error}")

        with open(output_file, "w", encoding="utf-8") as f:
            f.write("\n".join(report))
        logger.info(f"Report generated: {output_file}")

async def main():
    parser = argparse.ArgumentParser(description="Advanced LLM API Benchmarker")
    parser.add_argument("--url", default=DEFAULT_BASE_URL, help="Base URL")
    parser.add_argument("--key", default=DEFAULT_API_KEY, help="API Key")
    parser.add_argument("--c-comp", type=int, default=5, help="Completion Concurrency")
    parser.add_argument("--c-ocr", type=int, default=2, help="OCR Concurrency")
    parser.add_argument("--c-audio", type=int, default=2, help="TTS/ASR Concurrency")
    parser.add_argument("--iters", type=int, default=10, help="Iterations per test suite")
    parser.add_argument("--output", default="api_performance_report.md", help="Output report file")
    
    args = parser.parse_args()

    bench = ApiBenchmarker(args.url, args.key)
    bench.semaphores["completions"] = asyncio.Semaphore(args.c_comp)
    bench.semaphores["ocr"] = asyncio.Semaphore(args.c_ocr)
    bench.semaphores["tts-asr"] = asyncio.Semaphore(args.c_audio)

    async with httpx.AsyncClient(headers=bench.headers, timeout=120.0) as client:
        logger.info("Starting Benchmark Suites...")
        
        # Suite 1: Stability - Completion Contexts
        logger.info("Running Stability Suite (Short Context)...")
        await bench.run_task(client, "completions", "Completion-Short", args.iters)
        
        logger.info("Running Stability Suite (Normal Context)...")
        await bench.run_task(client, "completions", "Completion-Normal", args.iters)
        
        logger.info("Running Stability Suite (Long Context)...")
        await bench.run_task(client, "completions", "Completion-Long", args.iters)
        
        # Suite 2: Functional Concurrency
        logger.info("Running OCR Concurrency Suite...")
        await bench.run_task(client, "ocr", "OCR-Concurrent", args.iters)
        
        logger.info("Running TTS Concurrency Suite...")
        await bench.run_task(client, "tts", "TTS-Concurrent", args.iters)
        
        logger.info("Running ASR Concurrency Suite...")
        await bench.run_task(client, "asr", "ASR-Concurrent", args.iters)
        
        logger.info("Running File Transformation Suite...")
        await bench.run_task(client, "convert", "Convert-Concurrent", args.iters)

    bench.generate_report(args.output)
    print(f"\nBenchmark Complete! View the report at: {args.output}")

if __name__ == "__main__":
    asyncio.run(main())