import os
from pypdf import PdfReader
import arabic_reshaper
from bidi.algorithm import get_display

def test_pdf_processing():
    # Test multiple PDF files
    pdf_files = [
        r"e:\projects\metaboard_backend\src\test\raw_file\کارمزد-14030610-V(1.2).pdf",
        r"e:\projects\metaboard_backend\src\test\raw_file\پروپوزال ارائه سیم کارت به نمایندگان - 14030813.pdf"
    ]
    
    try:
        for pdf_path in pdf_files:
            if not os.path.exists(pdf_path):
                print(f"File not found: {pdf_path}")
                continue
                
            print(f"\nTesting PDF processing for: {pdf_path}")
            print("-" * 50)
            
            # Extract text using pypdf
            print("Extracting text...")
            reader = PdfReader(pdf_path)
            
            print(f"Total pages: {len(reader.pages)}")
            
            # Process each page
            for page_num, page in enumerate(reader.pages, 1):
                text = page.extract_text()
                
                # Process the extracted text
                reshaped_text = arabic_reshaper.reshape(text)
                bidi_text = get_display(reshaped_text)
                
                print(f"\nPage {page_num} content (first 300 chars):")
                print(bidi_text[:300])
                print("..." if len(bidi_text) > 300 else "")
                
                # Try to detect common patterns or structure
                lines = bidi_text.split('\n')
                non_empty_lines = [line.strip() for line in lines if line.strip()]
                
                print(f"\nDetected {len(non_empty_lines)} non-empty lines on page {page_num}")
                if non_empty_lines:
                    print("First few lines:")
                    for line in non_empty_lines[:3]:
                        print(f"- {line}")
            
        return True
    except Exception as e:
        print(f"Error: {str(e)}")
        return False

if __name__ == "__main__":
    success = test_pdf_processing()
    print(f"\nTest {'succeeded' if success else 'failed'}")