<?php

namespace App\Http\Controllers;

use Illuminate\Http\Request;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Str;
use App\Services\OcrService;
use App\Jobs\ProcessVoterImageBatch;
use App\Http\Controllers\VoterImageBatchController;

class PdfToPngController extends Controller
{
    /**
     * Upload PDF and extract pages as PNG images
     * 
     * @param Request $request
     * @return \Illuminate\Http\JsonResponse
     */
    public function extractPdfToPng(Request $request)
    {
        try {
            // Validate request
            $request->validate([
                'pdf_file' => 'required|file|mimes:pdf|max:51200', // Max 50MB
            ]);

            $pdfFile = $request->file('pdf_file');
            $reset = filter_var($request->input('reset', false), FILTER_VALIDATE_BOOLEAN);
            $import = filter_var($request->input('import', false), FILTER_VALIDATE_BOOLEAN);
            $dpi = $request->input('dpi', 200); // Default 200 DPI to match automator.png
            
            // Save PDF temporarily for analysis
            $tempPdfPath = sys_get_temp_dir() . '/' . Str::uuid() . '.pdf';
            $pdfFile->move(sys_get_temp_dir(), basename($tempPdfPath));
            
            Log::info('Starting PDF analysis and extraction');
            
            // Check if ImageMagick/Ghostscript is available
            $convertPath = $this->findConvertCommand();
            if (!$convertPath) {
                @unlink($tempPdfPath);
                return response()->json([
                    'success' => false,
                    'message' => 'ImageMagick not found',
                    'error' => 'ImageMagick (convert command) is required but not installed. Install with: brew install imagemagick'
                ], 500);
            }
            
            // Extract constituency and booth info from first page
            $firstPageInfo = $this->extractConstituencyAndBoothFromFirstPage($tempPdfPath, $convertPath, $dpi);
            
            if (!$firstPageInfo['constituency'] || !$firstPageInfo['booth_number']) {
                @unlink($tempPdfPath);
                return response()->json([
                    'success' => false,
                    'message' => 'Failed to extract constituency and booth information',
                    'error' => 'Could not parse constituency name or booth number from PDF first page',
                    'debug' => $firstPageInfo
                ], 400);
            }
            
            $constituencyName = $firstPageInfo['constituency'];
            $boothNumber = $firstPageInfo['booth_number'];
            
            Log::info('Extracted metadata from PDF', [
                'constituency' => $constituencyName,
                'booth_number' => $boothNumber
            ]);
            
            // Construct output directory path
            $constituencyFolder = config('filesystems.disks.constituency.root');
            $constituencyPath = $constituencyFolder . '/' . $constituencyName;
            $outputDir = $constituencyPath . '/' . $boothNumber;
            
            // Handle reset parameter - delete booth folder if exists and reset=true
            if ($reset && is_dir($outputDir)) {
                $this->deleteDirectory($outputDir);
                Log::info("Reset enabled: Deleted existing booth directory", ['path' => $outputDir]);
            }
            
            // Create constituency folder if not exists
            if (!is_dir($constituencyPath)) {
                mkdir($constituencyPath, 0755, true);
                Log::info("Created constituency directory: {$constituencyPath}");
            }
            
            // Create booth folder if not exists
            if (!is_dir($outputDir)) {
                mkdir($outputDir, 0755, true);
                Log::info("Created booth directory: {$outputDir}");
            }
            
            Log::info('Processing PDF to PNG conversion', [
                'constituency' => $constituencyName,
                'booth_number' => $boothNumber,
                'pdf_path' => $tempPdfPath,
                'output_dir' => $outputDir,
                'dpi' => $dpi,
                'reset' => $reset
            ]);
            
            // Get PDF page count
            $pageCount = $this->getPdfPageCount($tempPdfPath);
            if ($pageCount === 0) {
                @unlink($tempPdfPath);
                return response()->json([
                    'success' => false,
                    'message' => 'Could not determine PDF page count',
                    'error' => 'The PDF file may be corrupted or empty'
                ], 400);
            }
            
            Log::info("PDF has {$pageCount} pages");
            
            // STEP 1: Extract ALL pages as "voters N.png" first (FAST - no OCR)
            Log::info("Step 1: Extracting all pages as PNG (no OCR yet)...");
            $extractedFiles = [];
            $errors = [];
            
            for ($pageNum = 0; $pageNum < $pageCount; $pageNum++) {
                try {
                    $pageIndex = $pageNum + 1; // 1-based for display
                    $tempFileName = "voters {$pageIndex}.png"; // Temporary name
                    $outputFile = $outputDir . '/' . $tempFileName;
                    
                    // Convert specific page to PNG (RGB color mode to match automator.png)
                    $command = sprintf(
                        '%s -density %d %s[%d] -colorspace RGB -type TrueColor -quality 100 -alpha remove -background white %s 2>&1',
                        escapeshellarg($convertPath),
                        $dpi,
                        escapeshellarg($tempPdfPath),
                        $pageNum,
                        escapeshellarg($outputFile)
                    );
                    
                    exec($command, $output, $returnCode);
                    
                    if ($returnCode === 0 && file_exists($outputFile)) {
                        $extractedFiles[] = [
                            'page' => $pageIndex,
                            'temp_name' => $tempFileName,
                            'path' => $outputFile
                        ];
                        Log::info("Extracted page {$pageIndex}");
                    } else {
                        $errors[] = [
                            'page' => $pageIndex,
                            'error' => 'Conversion failed',
                            'output' => implode("\n", $output)
                        ];
                        Log::error("Failed to extract page {$pageIndex}");
                    }
                    
                } catch (\Exception $e) {
                    $errors[] = [
                        'page' => $pageNum + 1,
                        'error' => $e->getMessage()
                    ];
                }
            }
            
            if (empty($extractedFiles)) {
                @unlink($tempPdfPath);
                return response()->json([
                    'success' => false,
                    'message' => 'Failed to extract any pages',
                    'errors' => $errors
                ], 500);
            }
            
            Log::info("Step 1 complete: Extracted {$pageCount} pages");
            
            // STEP 2: Scan extracted images to find map page (OCR only on first 5 pages)
            Log::info("Step 2: Scanning images to find map page...");
            $mapPageIndex = $this->findMapPageFromImages($outputDir, $extractedFiles);
            
            // STEP 3: Rename files based on map page index
            Log::info("Step 3: Renaming files based on map page location...");
            $finalFiles = $this->renameFilesBasedOnMapIndex($outputDir, $extractedFiles, $mapPageIndex);
            
            // Cleanup temporary PDF
            @unlink($tempPdfPath);
            
            // Count file types
            $boothInfoCount = 0;
            $voterCount = 0;
            $hasMap = false;
            
            foreach ($finalFiles as $file) {
                if ($file['type'] === 'booth_info') $boothInfoCount++;
                elseif ($file['type'] === 'voter') $voterCount++;
                elseif ($file['type'] === 'map') $hasMap = true;
            }
            
            // Execute import if requested
            if ($import) {
                Log::info('Import parameter is true, starting voter import process...');
                
                try {
                    // Reuse VoterImageBatchController for consistent import logic
                    $batchController = new VoterImageBatchController();
                    $importParams = [
                        'constituency' => $constituencyName,
                        'booth_number' => $boothNumber,
                        'start_index' => 0,
                        'max_images' => null,
                        'use_crops' => true,
                        'keep_crops' => false,
                        'reset_booth_voters' => false // Don't delete existing voters by default
                        // crop_options will use defaults from VoterImageBatchController
                    ];
                    
                    // Call internal method (no encryption needed)
                    $response = $batchController->runImport($importParams);
                    
                    Log::info('Voter import completed via VoterImageBatchController');
                    
                } catch (\Exception $e) {
                    Log::error('Import failed after extraction', [
                        'error' => $e->getMessage(),
                        'trace' => $e->getTraceAsString()
                    ]);
                    
                    // If import fails, show extraction details with error
                    $response = response()->json([
                        'success' => false,
                        'message' => 'PDF extracted successfully but import failed',
                        'constituency' => $constituencyName,
                        'booth_number' => $boothNumber,
                        'directory' => $outputDir,
                        'total_pages' => $pageCount,
                        'extracted_pages' => count($finalFiles),
                        'import_error' => $e->getMessage()
                    ]);
                }
            } else {
                // Only show extraction details when import is not requested
                $response = [
                    'success' => true,
                    'message' => 'PDF pages extracted and organized successfully',
                    'constituency' => $constituencyName,
                    'booth_number' => $boothNumber,
                    'directory' => $outputDir,
                    'total_pages' => $pageCount,
                    'extracted_pages' => count($finalFiles),
                    'failed_pages' => count($errors),
                    'booth_info_pages' => $boothInfoCount,
                    'map_found' => $hasMap,
                    'voter_pages' => $voterCount,
                    'files' => $finalFiles,
                    'errors' => $errors,
                    'next_step' => 'Use /api/image-import/run endpoint to process the extracted images'
                ];
            }
            
            return response()->json($response);
            
        } catch (\Illuminate\Validation\ValidationException $e) {
            return response()->json([
                'success' => false,
                'message' => 'Validation failed',
                'errors' => $e->errors()
            ], 422);
            
        } catch (\Exception $e) {
            Log::error('PDF to PNG conversion failed', [
                'error' => $e->getMessage(),
                'trace' => $e->getTraceAsString()
            ]);
            
            return response()->json([
                'success' => false,
                'message' => 'PDF conversion failed',
                'error' => $e->getMessage()
            ], 500);
        }
    }
    
    /**
     * Find the ImageMagick convert command
     * 
     * @return string|null
     */
    protected function findConvertCommand(): ?string
    {
        // Try common paths
        $possiblePaths = [
            '/usr/bin/convert',
            '/usr/local/bin/convert',
            '/opt/homebrew/bin/convert',
        ];
        
        foreach ($possiblePaths as $path) {
            if (file_exists($path) && is_executable($path)) {
                return $path;
            }
        }
        
        // Try which command
        exec('which convert 2>/dev/null', $output, $returnCode);
        if ($returnCode === 0 && !empty($output[0])) {
            return trim($output[0]);
        }
        
        return null;
    }
    
    /**
     * Analyze all pages to determine their types and appropriate file names
     * 
     * Strategy:
     * 1. Scan ALL pages starting from page 1 to find "Google Map View" text
     * 2. Pages BEFORE map page = booth_info 1.png, booth_info 2.png, etc.
     * 3. Map page itself = map.png
     * 4. Pages AFTER map page = voters 1.png, voters 2.png, etc.
     * 
     * @param string $pdfPath Path to PDF file
     * @param int $pageCount Total number of pages
     * @param string $convertPath Path to convert command
     * @param int $dpi Resolution for OCR
     * @return array Array of page types with naming info
     */
    protected function analyzePageTypes(string $pdfPath, int $pageCount, string $convertPath, int $dpi): array
    {
        $pageTypes = [];
        $mapPageIndex = null;
        
        // OPTIMIZED: Scan only first few pages to find map (typically page 2-5)
        Log::info("Scanning pages 2-5 to find Google Map View...");
        
        $maxPagesToScan = min(5, $pageCount); // Only scan first 5 pages max
        
        for ($pageNum = 1; $pageNum < $maxPagesToScan; $pageNum++) { // Start from page 2 (index 1)
            $pageIndex = $pageNum + 1; // 1-based for logging
            
            Log::info("Checking page {$pageIndex}...");
            $pageText = $this->extractTextFromPdfPage($pdfPath, $pageNum, $convertPath, $dpi);
            
            Log::info("OCR result", [
                'page' => $pageIndex,
                'text_length' => strlen($pageText),
                'preview' => substr($pageText, 0, 200)
            ]);
            
            if ($this->isGoogleMapPage($pageText)) {
                $mapPageIndex = $pageNum;
                Log::info("✓ Found Google Map View on page {$pageIndex} (index {$pageNum}) - STOPPING scan");
                break; // STOP HERE - no need to scan remaining pages
            }
        }
        
        // If no map page found after scanning all pages
        if ($mapPageIndex === null) {
            Log::warning("No Google Map View found. Page 1 = booth_info, rest = voters.");
            $mapPageIndex = -1; // Special value indicating no map found
        }
        
        // NOW assign file names based on mapPageIndex (NO MORE OCR)
        $boothInfoCounter = 1;
        $voterCounter = 1;
        
        for ($pageNum = 0; $pageNum < $pageCount; $pageNum++) {
            if ($mapPageIndex === -1) {
                // No map found: page 1 = booth_info, rest = voters
                if ($pageNum === 0) {
                    $pageTypes[$pageNum] = [
                        'type' => 'booth_info',
                        'name' => "booth_info {$boothInfoCounter}.png"
                    ];
                    $boothInfoCounter++;
                } else {
                    $pageTypes[$pageNum] = [
                        'type' => 'voter',
                        'name' => "voters {$voterCounter}.png"
                    ];
                    $voterCounter++;
                }
                
            } elseif ($pageNum < $mapPageIndex) {
                // pageNum < mapPageIndex = booth_info
                $pageTypes[$pageNum] = [
                    'type' => 'booth_info',
                    'name' => "booth_info {$boothInfoCounter}.png"
                ];
                $boothInfoCounter++;
                
            } elseif ($pageNum === $mapPageIndex) {
                // pageNum == mapPageIndex = map.png
                $pageTypes[$pageNum] = [
                    'type' => 'map',
                    'name' => 'map.png'
                ];
                
            } else {
                // pageNum > mapPageIndex = voters
                $pageTypes[$pageNum] = [
                    'type' => 'voter',
                    'name' => "voters {$voterCounter}.png"
                ];
                $voterCounter++;
            }
        }
        
        Log::info("✓ File naming complete (OCR stopped after finding map)", [
            'total_pages' => $pageCount,
            'booth_info_pages' => $boothInfoCounter - 1,
            'map_page' => $mapPageIndex === -1 ? 'not found' : ($mapPageIndex + 1),
            'voter_pages' => $voterCounter - 1
        ]);
        
        return $pageTypes;
    }
    
    /**
     * Extract text from a specific PDF page for analysis
     * 
     * @param string $pdfPath Path to PDF
     * @param int $pageNum Page number (0-based)
     * @param string $convertPath Path to convert command
     * @param int $dpi Resolution
     * @return string Extracted text
     */
    protected function extractTextFromPdfPage(string $pdfPath, int $pageNum, string $convertPath, int $dpi): string
    {
        try {
            // Create temporary image for this page
            $tempImage = sys_get_temp_dir() . '/' . Str::uuid() . '.png';
            
            // Convert page to PNG (RGB color mode to match automator.png)
            $command = sprintf(
                '%s -density %d %s[%d] -colorspace RGB -type TrueColor -quality 100 -alpha remove -background white %s 2>&1',
                escapeshellarg($convertPath),
                $dpi,
                escapeshellarg($pdfPath),
                $pageNum,
                escapeshellarg($tempImage)
            );
            
            exec($command, $output, $returnCode);
            
            if ($returnCode !== 0 || !file_exists($tempImage)) {
                Log::warning("Failed to create temp image for page {$pageNum}");
                return '';
            }
            
            // Use Tesseract OCR to extract text
            $tesseractPath = $this->findTesseractCommand();
            if (!$tesseractPath) {
                Log::warning("Tesseract not found, skipping text extraction");
                @unlink($tempImage);
                return '';
            }
            
            $outputBase = sys_get_temp_dir() . '/' . Str::uuid();
            $command = sprintf(
                '%s %s %s 2>&1',
                escapeshellarg($tesseractPath),
                escapeshellarg($tempImage),
                escapeshellarg($outputBase)
            );
            
            exec($command, $ocrOutput, $returnCode);
            
            $textFile = $outputBase . '.txt';
            $text = '';
            
            if (file_exists($textFile)) {
                $text = file_get_contents($textFile);
                @unlink($textFile);
            }
            
            // Cleanup
            @unlink($tempImage);
            
            return $text;
            
        } catch (\Exception $e) {
            Log::error("Failed to extract text from page {$pageNum}: " . $e->getMessage());
            return '';
        }
    }
    
    /**
     * Check if page contains Google Map view indicators
     * 
     * @param string $text Extracted text from page
     * @return bool
     */
    protected function isGoogleMapPage(string $text): bool
    {
        if (empty($text)) {
            return false;
        }
        
        $textLower = strtolower($text);
        $textNoSpaces = preg_replace('/\s+/', '', $textLower); // Remove ALL whitespace
        
        // Pattern 1: Direct text match (flexible spacing)
        $patterns = [
            'googlemapview',
            'googlemap',
            'nazrinaksha',
            'naksha',
        ];
        
        foreach ($patterns as $pattern) {
            if (str_contains($textNoSpaces, $pattern)) {
                Log::info("✓ Map page detected", ['pattern' => $pattern]);
                return true;
            }
        }
        
        // Pattern 2: "google" + "map" keywords (can be anywhere in text)
        if (str_contains($textLower, 'google') && str_contains($textLower, 'map')) {
            Log::info("✓ Map page detected", ['method' => 'google+map']);
            return true;
        }
        
        // Pattern 3: "map" + "view" combination
        if (str_contains($textLower, 'map') && str_contains($textLower, 'view')) {
            Log::info("✓ Map page detected", ['method' => 'map+view']);
            return true;
        }
        
        // Pattern 4: Location indicators (polling station maps)
        $locationKeywords = ['polling station front', 'building front', 'cad view', 'key map'];
        foreach ($locationKeywords as $keyword) {
            if (str_contains($textLower, $keyword)) {
                Log::info("✓ Map page detected", ['keyword' => $keyword]);
                return true;
            }
        }
        
        return false;
    }
    
    /**
     * Find Tesseract OCR command
     * 
     * @return string|null
     */
    protected function findTesseractCommand(): ?string
    {
        // Try common paths
        $possiblePaths = [
            '/usr/bin/tesseract',
            '/usr/local/bin/tesseract',
            '/opt/homebrew/bin/tesseract',
        ];
        
        foreach ($possiblePaths as $path) {
            if (file_exists($path) && is_executable($path)) {
                return $path;
            }
        }
        
        // Try which command
        exec('which tesseract 2>/dev/null', $output, $returnCode);
        if ($returnCode === 0 && !empty($output[0])) {
            return trim($output[0]);
        }
        
        return null;
    }
    
    /**
     * Get PDF page count using ImageMagick identify command
     * 
     * @param string $pdfPath
     * @return int
     */
    protected function getPdfPageCount(string $pdfPath): int
    {
        // Try using identify command
        $identifyPath = str_replace('convert', 'identify', $this->findConvertCommand() ?? '');
        if ($identifyPath && file_exists($identifyPath)) {
            $command = sprintf('%s -format "%%n\n" %s 2>&1', escapeshellarg($identifyPath), escapeshellarg($pdfPath));
            exec($command, $output, $returnCode);
            
            if ($returnCode === 0 && !empty($output)) {
                return max(1, (int)$output[0]);
            }
        }
        
        // Fallback: try pdfinfo if available
        exec('which pdfinfo 2>/dev/null', $output, $returnCode);
        if ($returnCode === 0 && !empty($output[0])) {
            $pdfinfoPath = trim($output[0]);
            exec(sprintf('%s %s 2>&1 | grep Pages', escapeshellarg($pdfinfoPath), escapeshellarg($pdfPath)), $output);
            foreach ($output as $line) {
                if (preg_match('/Pages:\s+(\d+)/', $line, $matches)) {
                    return (int)$matches[1];
                }
            }
        }
        
        // Last resort: assume single page
        return 1;
    }
    
    /**
     * Extract constituency name and booth number from PDF first page
     * 
     * @param string $pdfPath Path to PDF file
     * @param string $convertPath Path to ImageMagick convert command
     * @param int $dpi Resolution for OCR
     * @return array ['constituency' => string|null, 'booth_number' => string|null, 'raw_text' => string]
     */
    protected function extractConstituencyAndBoothFromFirstPage(string $pdfPath, string $convertPath, int $dpi): array
    {
        $result = [
            'constituency' => null,
            'booth_number' => null,
            'raw_text' => ''
        ];
        
        try {
            // First, extract page 1 as PNG at high resolution (RGB color mode to match automator.png)
            $tempImage = sys_get_temp_dir() . '/' . Str::uuid() . '.png';
            
            $command = sprintf(
                '%s -density %d %s[0] -colorspace RGB -type TrueColor -quality 100 -alpha remove -background white %s 2>&1',
                escapeshellarg($convertPath),
                $dpi,
                escapeshellarg($pdfPath),
                escapeshellarg($tempImage)
            );
            
            exec($command, $output, $returnCode);
            
            if ($returnCode !== 0 || !file_exists($tempImage)) {
                Log::error("Failed to extract first page as image");
                return $result;
            }
            
            // Try Google Vision API first for better accuracy
            $ocrService = app(OcrService::class);
            $firstPageText = '';
            
            try {
                $firstPageText = $ocrService->extractText($tempImage);
                Log::info('Google Vision API extracted text', ['length' => strlen($firstPageText)]);
            } catch (\Exception $e) {
                Log::warning('Google Vision API failed, falling back to Tesseract', ['error' => $e->getMessage()]);
            }
            
            // Fallback to Tesseract if Google Vision fails or returns empty
            if (empty($firstPageText)) {
                Log::info('Attempting Tesseract OCR as fallback');
                $tesseractPath = $this->findTesseractCommand();
                if ($tesseractPath) {
                    $outputBase = sys_get_temp_dir() . '/' . Str::uuid();
                    $command = sprintf(
                        '%s %s %s 2>&1',
                        escapeshellarg($tesseractPath),
                        escapeshellarg($tempImage),
                        escapeshellarg($outputBase)
                    );
                    
                    exec($command, $ocrOutput, $returnCode);
                    
                    $textFile = $outputBase . '.txt';
                    if (file_exists($textFile)) {
                        $firstPageText = file_get_contents($textFile);
                        @unlink($textFile);
                        Log::info('Tesseract extracted text', ['length' => strlen($firstPageText)]);
                    }
                }
            }
            
            // Cleanup temp image
            @unlink($tempImage);
            
            $result['raw_text'] = $firstPageText;
            
            if (empty($firstPageText)) {
                Log::error('No text extracted from first page using any OCR method');
                return $result;
            }
            
            Log::info('OCR text sample', ['first_500_chars' => substr($firstPageText, 0, 500)]);
            
            // Parse constituency name - handle multiple formats
            // Format 1: "No. Name and Reservation Status of Assembly Constituency : 1 - MANNADIPET (GEN)"
            // Format 2: "Assembly Constituency : 16 - ORLEAMPETH (GEN)"
            if (preg_match('/(?:No\.\s+Name\s+and\s+Reservation\s+Status\s+of\s+)?Assembly\s+Constituency\s*:\s*(\d+\s*-\s*[A-Z]+)/i', $firstPageText, $matches)) {
                $constituencyName = trim($matches[1]);
                $result['constituency'] = $constituencyName;
                Log::info('Extracted constituency (Format 1)', ['name' => $result['constituency']]);
            }
            // Format 3: Look for pattern like "1 - MANNADIPET (GEN)" in the header area
            elseif (preg_match('/\b(\d+)\s*-\s*([A-Z]{4,})\s*\((?:GEN|SC|ST)\)/i', $firstPageText, $matches)) {
                $constituencyName = $matches[1] . ' - ' . $matches[2];
                $result['constituency'] = $constituencyName;
                Log::info('Extracted constituency (Format 3)', ['name' => $result['constituency']]);
            } else {
                Log::warning('Could not match constituency pattern in text', [
                    'first_300_chars' => substr($firstPageText, 0, 300)
                ]);
            }
            
            // Parse booth/part number from PDF - handle multiple formats
            
            // Pattern 1: "Part No. : 1" or "Part No.: 1" (most common format in header)
            if (preg_match('/Part\s+No\.?\s*[:\.]?\s*(\d+)/i', $firstPageText, $matches)) {
                $result['booth_number'] = trim($matches[1]);
                Log::info('Extracted booth number (Part No. pattern)', ['booth' => $result['booth_number']]);
            }
            // Pattern 2: "Part No : 1" (with colon space)
            elseif (preg_match('/Part\s+No\s*:\s*(\d+)/i', $firstPageText, $matches)) {
                $result['booth_number'] = trim($matches[1]);
                Log::info('Extracted booth number (Part No colon pattern)', ['booth' => $result['booth_number']]);
            }
            // Pattern 3: Look for "Part" followed by number in same vicinity
            elseif (preg_match('/Part[^\d]*(\d+)/i', $firstPageText, $matches)) {
                $result['booth_number'] = trim($matches[1]);
                Log::info('Extracted booth number (Part vicinity pattern)', ['booth' => $result['booth_number']]);
            }
            // Pattern 4: "Total Pages XX - Page 1" (footer format)
            elseif (preg_match('/Total\s+Pages\s+\d+\s*-\s*Page\s+(\d+)/i', $firstPageText, $matches)) {
                $result['booth_number'] = trim($matches[1]);
                Log::info('Extracted booth number (Page number pattern)', ['booth' => $result['booth_number']]);
            }
            // Pattern 5: Look in header table structure
            elseif (preg_match('/Assembly\s+Constituency[^\n]*\n[^\n]*Part.*?(\d+)/is', $firstPageText, $matches)) {
                $result['booth_number'] = trim($matches[1]);
                Log::info('Extracted booth number (header table pattern)', ['booth' => $result['booth_number']]);
            }
            // Pattern 6: Last resort - look for standalone number after "(GEN)" or in top-right area
            elseif (preg_match('/\((?:GEN|SC|ST)\)[^\n]*?(\d+)/i', $firstPageText, $matches)) {
                $result['booth_number'] = trim($matches[1]);
                Log::info('Extracted booth number (GEN vicinity pattern)', ['booth' => $result['booth_number']]);
            } else {
                Log::warning('Could not match part/booth number pattern in text', [
                    'text_sample' => substr($firstPageText, 0, 500)
                ]);
            }
            
            return $result;
            
        } catch (\Exception $e) {
            Log::error('Failed to extract constituency and booth info', [
                'error' => $e->getMessage(),
                'trace' => $e->getTraceAsString()
            ]);
            return $result;
        }
    }
    
    /**
     * Recursively delete a directory and all its contents
     * 
     * @param string $dir Directory path to delete
     * @return bool
     */
    protected function deleteDirectory(string $dir): bool
    {
        if (!is_dir($dir)) {
            return false;
        }
        
        $items = array_diff(scandir($dir), ['.', '..']);
        
        foreach ($items as $item) {
            $path = $dir . '/' . $item;
            
            if (is_dir($path)) {
                $this->deleteDirectory($path);
            } else {
                @unlink($path);
            }
        }
        
        return rmdir($dir);
    }
    
    /**
     * Find map page by scanning extracted PNG images (OCR on images, not PDF)
     * 
     * @param string $outputDir Directory with extracted images
     * @param array $extractedFiles List of extracted files
     * @return int|null Index of map page (0-based), or null if not found
     */
    protected function findMapPageFromImages(string $outputDir, array $extractedFiles): ?int
    {
        // Use Google Vision API for better OCR accuracy
        $ocrService = app(OcrService::class);
        
        // Only scan first 5 images (map is usually in pages 2-5)
        $maxToScan = min(5, count($extractedFiles));
        
        for ($i = 1; $i < $maxToScan; $i++) { // Start from index 1 (page 2)
            $file = $extractedFiles[$i];
            $imagePath = $file['path'];
            $pageNum = $file['page'];
            
            Log::info("Scanning image page {$pageNum} with Google Vision API...");
            
            try {
                // Extract text using Google Vision API (much better than Tesseract)
                $text = $ocrService->extractText($imagePath);
                
                if (!empty($text)) {
                    Log::info("Google Vision extracted from page {$pageNum}", [
                        'length' => strlen($text),
                        'preview' => substr($text, 0, 300)
                    ]);
                    
                    if ($this->isGoogleMapPage($text)) {
                        Log::info("✓✓✓ Found map page at index {$i} (page {$pageNum})");
                        return $i;
                    }
                } else {
                    Log::warning("No text extracted from page {$pageNum}");
                }
                
            } catch (\Exception $e) {
                Log::error("Failed to scan image page {$pageNum}: " . $e->getMessage());
            }
        }
        
        Log::warning("No map page found in first {$maxToScan} pages");
        return null;
    }
    
    /**
     * Rename files based on map page index
     * 
     * @param string $outputDir Directory with files
     * @param array $extractedFiles List of extracted files
     * @param int|null $mapPageIndex Index of map page (0-based)
     * @return array Updated file list with correct names and types
     */
    protected function renameFilesBasedOnMapIndex(string $outputDir, array $extractedFiles, ?int $mapPageIndex): array
    {
        $finalFiles = [];
        $boothInfoCounter = 1;
        $voterCounter = 1;
        
        foreach ($extractedFiles as $index => $file) {
            $oldPath = $file['path'];
            $pageNum = $file['page'];
            
            // Determine file type and new name
            if ($mapPageIndex === null) {
                // No map found: page 1 = booth_info, rest = voters
                if ($index === 0) {
                    $newName = "booth_info {$boothInfoCounter}.png";
                    $type = 'booth_info';
                    $boothInfoCounter++;
                } else {
                    $newName = "voters {$voterCounter}.png";
                    $type = 'voter';
                    $voterCounter++;
                }
                
            } elseif ($index < $mapPageIndex) {
                // Before map = booth_info
                $newName = "booth_info {$boothInfoCounter}.png";
                $type = 'booth_info';
                $boothInfoCounter++;
                
            } elseif ($index === $mapPageIndex) {
                // Map page
                $newName = "map.png";
                $type = 'map';
                
            } else {
                // After map = voters
                $newName = "voters {$voterCounter}.png";
                $type = 'voter';
                $voterCounter++;
            }
            
            $newPath = $outputDir . '/' . $newName;
            
            // Rename file if needed
            if ($oldPath !== $newPath) {
                if (file_exists($oldPath)) {
                    rename($oldPath, $newPath);
                    Log::info("Renamed page {$pageNum}: {$file['temp_name']} → {$newName}");
                }
            }
            
            $finalFiles[] = [
                'page' => $pageNum,
                'type' => $type,
                'filename' => $newName,
                'path' => $newPath,
                'size' => file_exists($newPath) ? filesize($newPath) : 0
            ];
        }
        
        return $finalFiles;
    }
}
