from __future__ import annotations
import time
import requests
from typing import Dict, Any

def ollama_generate(
    model: str,
    prompt: str,
    host: str = "http://localhost:11434",
    num_ctx: int = 12288,  # ✅ FIXED: Reduced default from 32768 to 12288
    temperature: float = 0.1,  # ✅ FIXED: Lower temperature for more consistent extraction
    timeout: int = 1800,  # ✅ FIXED: Changed from 3600 (1 hour) to 1800 (30 minutes)
    max_retries: int = 3,
    retry_wait_sec: int = 10,
) -> str:
    url = f"{host}/api/generate"
    payload: Dict[str, Any] = {
        "model": model,
        "prompt": prompt,
        "stream": False,

        # ✅ FORCE JSON OUTPUT
        "format": "json",

        "options": {
            "temperature": temperature,
            "num_ctx": num_ctx,
        },
    }

    last_err = None
    for attempt in range(1, max_retries + 1):
        try:
            print(f"  🔄 LLM call attempt {attempt}/{max_retries}...")  # ✅ ADDED: Progress indicator
            r = requests.post(url, json=payload, timeout=timeout)
            r.raise_for_status()
            data = r.json()
            response = (data.get("response") or "").strip()
            
            # ✅ ADDED: Validate response is not empty
            if not response:
                raise ValueError("LLM returned empty response")
            
            print(f"  ✅ LLM call successful (response length: {len(response)} chars)")
            return response
            
        except requests.exceptions.Timeout:
            last_err = TimeoutError(f"LLM request timed out after {timeout} seconds")
            print(f"  ⏱️ Timeout on attempt {attempt}/{max_retries}")
            if attempt < max_retries:
                print(f"  ⏳ Waiting {retry_wait_sec} seconds before retry...")
                time.sleep(retry_wait_sec)
                
        except Exception as e:
            last_err = e
            print(f"  ⚠️ LLM call failed (attempt {attempt}/{max_retries}): {e}")
            if attempt < max_retries:
                print(f"  ⏳ Waiting {retry_wait_sec} seconds before retry...")
                time.sleep(retry_wait_sec)

    # If all retries failed, raise the last error
    print(f"  ❌ All {max_retries} attempts failed!")
    raise last_err