#!/usr/bin/env python3
"""
SIMPLE RUNNER - Medical Transcript Analysis
Just run this file after uploading your PDFs
"""

import os
import sys
from pathlib import Path

def check_requirements():
    """Check if all requirements are met"""
    print("Checking requirements...\n")
    
    errors = []
    
    # Check Python packages
    required_packages = ['PyPDF2', 'pandas', 'openpyxl', 'requests']
    for package in required_packages:
        try:
            __import__(package)
            print(f"✓ {package} installed")
        except ImportError:
            print(f"✗ {package} NOT installed")
            errors.append(f"Missing package: {package}")
    
    # Check if Ollama is accessible
    try:
        import requests
        response = requests.get('http://localhost:11434/api/tags', timeout=2)
        if response.status_code == 200:
            print("✓ Ollama is running")
            models = response.json().get('models', [])
            if models:
                print(f"  Available models: {', '.join([m['name'] for m in models])}")
            else:
                print("  ⚠ No models found. Run: ollama pull llama3.1:8b")
        else:
            print("✗ Ollama is not responding")
            errors.append("Ollama not running")
    except Exception as e:
        print(f"✗ Cannot connect to Ollama: {e}")
        errors.append("Ollama not running or not installed")
    
    # Check PDF directory
    pdf_dir = "/mnt/user-data/uploads"
    if os.path.exists(pdf_dir):
        pdf_files = list(Path(pdf_dir).glob("*.pdf"))
        if pdf_files:
            print(f"✓ Found {len(pdf_files)} PDF files in {pdf_dir}")
        else:
            print(f"⚠ No PDF files found in {pdf_dir}")
            errors.append("No PDF files found")
    else:
        print(f"✗ Directory {pdf_dir} does not exist")
        errors.append("PDF directory not found")
    
    print()
    
    if errors:
        print("ERRORS FOUND:")
        for error in errors:
            print(f"  - {error}")
        print("\nPlease fix these issues before running.")
        return False
    
    print("All requirements met! ✓\n")
    return True


def run_analysis(model_name="llama3.1:8b", pdf_directory="/mnt/user-data/uploads"):
    """Run the complete analysis"""
    
    from medical_transcript_analyzer import MedicalTranscriptAnalyzer
    from datetime import datetime
    import json
    
    print("=" * 80)
    print(f"STARTING ANALYSIS WITH {model_name.upper()}")
    print("=" * 80)
    print()
    
    # Create output directory
    output_dir = "/mnt/user-data/outputs"
    os.makedirs(output_dir, exist_ok=True)
    
    # Initialize analyzer
    analyzer = MedicalTranscriptAnalyzer(model_name=model_name)
    
    # Analyze all transcripts
    print("Analyzing transcripts...")
    print("(This may take 1-2 minutes per PDF)\n")
    
    results = analyzer.analyze_all_transcripts(pdf_directory)
    
    if not results:
        print("\n⚠ No results generated!")
        print("Check if:")
        print("  1. PDFs exist in the directory")
        print("  2. Ollama is running (ollama serve)")
        print("  3. Model is downloaded (ollama pull llama3.1:8b)")
        return None
    
    # Save results
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    
    # Save raw JSON
    json_output = f"{output_dir}/extracted_data_{model_name.replace(':', '_')}_{timestamp}.json"
    with open(json_output, 'w', encoding='utf-8') as f:
        json.dump(results, f, indent=2, ensure_ascii=False)
    print(f"✓ Raw JSON saved: {Path(json_output).name}")
    
    # Create Excel tabulation
    excel_output = f"{output_dir}/ANALYSIS_TABULATION_{model_name.replace(':', '_')}_{timestamp}.xlsx"
    analyzer.create_tabulated_output(results, excel_output)
    
    # Generate text report
    report_output = f"{output_dir}/ANALYSIS_REPORT_{model_name.replace(':', '_')}_{timestamp}.txt"
    analyzer.generate_analysis_report(results, report_output)
    
    print("\n" + "=" * 80)
    print("ANALYSIS COMPLETE! ✓")
    print("=" * 80)
    print(f"\nOutput files in {output_dir}:")
    print(f"  1. {Path(json_output).name}")
    print(f"  2. {Path(excel_output).name} ← MAIN DELIVERABLE")
    print(f"  3. {Path(report_output).name}")
    print()
    
    return excel_output


def main():
    """Main function"""
    
    print("\n" + "=" * 80)
    print("MEDICAL TRANSCRIPT ENTITY EXTRACTION")
    print("Automated Analysis with Local LLM")
    print("=" * 80)
    print()
    
    # Check requirements
    if not check_requirements():
        print("\nSetup instructions:")
        print("  1. Install packages: pip install PyPDF2 pandas openpyxl requests")
        print("  2. Install Ollama: curl -fsSL https://ollama.com/install.sh | sh")
        print("  3. Download model: ollama pull llama3.1:8b")
        print("  4. Start Ollama: ollama serve")
        print("  5. Upload your PDF files")
        sys.exit(1)
    
    # Ask for model selection
    print("Available models:")
    print("  1. llama3.1:8b (Recommended - Meta Llama 3.1)")
    print("  2. gemma:7b (Alternative - Google Gemma)")
    print("  3. Both (compare results)")
    print()
    
    choice = input("Select option (1/2/3) [default=1]: ").strip() or "1"
    
    pdf_dir = input("\nPDF directory [default=/mnt/user-data/uploads]: ").strip() or "/mnt/user-data/uploads"
    
    print()
    
    if choice == "1":
        run_analysis("llama3.1:8b", pdf_dir)
    
    elif choice == "2":
        run_analysis("gemma:7b", pdf_dir)
    
    elif choice == "3":
        print("Running with BOTH models for comparison...\n")
        
        print("=" * 80)
        print("ROUND 1: LLAMA 3.1")
        print("=" * 80)
        result1 = run_analysis("llama3.1:8b", pdf_dir)
        
        print("\n" + "=" * 80)
        print("ROUND 2: GOOGLE GEMMA")
        print("=" * 80)
        result2 = run_analysis("gemma:7b", pdf_dir)
        
        if result1 and result2:
            print("\n" + "=" * 80)
            print("COMPARISON COMPLETE!")
            print("=" * 80)
            print("\nCompare these two Excel files:")
            print(f"  1. {Path(result1).name}")
            print(f"  2. {Path(result2).name}")
            print("\nCheck which model gives more accurate entity extraction!")
    
    else:
        print("Invalid choice. Exiting.")
        sys.exit(1)
    
    print("\n✓ All done! Check the output files.\n")


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\n\nAnalysis interrupted by user.")
        sys.exit(0)
    except Exception as e:
        print(f"\n\nERROR: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)
