ocr

ocr

<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Neuro-Optometry OCR Reader</title>
    
    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
    <link href="https://fonts.googleapis.com/css2?family=Lexend+Deca&display=swap" rel="stylesheet">

    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js"></script>

    <style>
        body {
            font-family: sans-serif;
            padding: 20px;
            background-color: #f4f4f9;
            color: #333;
        }

        /* Control Panel Styling */
        .controls {
            background: white;
            padding: 20px;
            border-radius: 8px;
            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
            margin-bottom: 20px;
            display: flex;
            flex-wrap: wrap;
            gap: 15px;
            align-items: center;
        }

        .control-group {
            display: flex;
            gap: 10px;
            align-items: center;
            border-right: 1px solid #ddd;
            padding-right: 15px;
        }

        .control-group:last-child {
            border-right: none;
        }

        h1 { margin: 0 0 20px 0; font-size: 1.5rem; }

        /* Button Styling */
        button {
            cursor: pointer;
            padding: 8px 12px;
            border: 1px solid #ccc;
            background-color: #e9e9ed;
            border-radius: 4px;
            font-size: 14px;
            transition: background 0.2s;
        }

        button:hover { background-color: #d0d0d5; }
        button:active { background-color: #b0b0b5; }

        input[type="file"] {
            font-size: 14px;
        }

        /* Status Bar */
        #status {
            margin-bottom: 10px;
            font-style: italic;
            color: #666;
            min-height: 20px;
        }

        /* Output Text Area Styling */
        #output-container {
            padding: 40px;
            border-radius: 8px;
            border: 1px solid #ccc;
            min-height: 400px;
            white-space: pre-wrap; /* Preserves line breaks from OCR *            line-height: 1.6;
            transition: all 0.3s ease;
            box-shadow: 0 2px 5px rgba(0,0,0,0.05);
        }

        /* Font Classes */
        .font-arial { font-family: Arial, sans-serif; }
        .font-lexend { font-family: 'Lexend Deca', sans-serif; }

        /* Background Classes */
        .bg-white { background-color: #ffffff; color: #000000; }
        /* Using a soft lavender for visual stress relief */
        .bg-purple { background-color: #E6E6FA; color: #000000; } 

    </style>
</head>
<body>

    <h1>Document OCR & Reader</h1>

    <div class="controls">
        <div class="control-group">
            <input type="file" id="pdf-upload" accept="application/pdf" />
            <button onclick="changeSize(2)">A+</button>
            <button onclick="changeSize(-2)">A-</button>
        </div>

        <div class="control-group">
            <button onclick="setFont('arial')">Standard font</button>
            <button onclick="setFont('lexend')">Font Ease</button>
        </div>

        <div class="control-group">
            <button onclick="setBg('white')">White Bkg</button>
            <button onclick="setBg('purple')" style="background-color: #E6E6FA;">Purple Bkg</button>
        </div>
    </div>

    <div id="status">Ready to upload PDF...</div>

    <div id="output-container" class="font-arial bg-white" style="font-size: 18px;">
        </div>

    <script>
        // Setup PDF.js worker
        pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';

        const outputDiv = document.getElementById('output-container');
        const statusDiv = document.getElementById('status');
        let currentFontSize = 18;

        // --- Display Controls ---

        function changeSize(amount) {
            currentFontSize += amount;
            // Prevent getting too small
            if (currentFontSize < 10) currentFontSize = 10; 
            outputDiv.style.fontSize = currentFontSize + 'px';
        }

        function setFont(type) {
            if (type === 'arial') {
                outputDiv.classList.remove('font-lexend');
                outputDiv.classList.add('font-arial');
            } else {
                outputDiv.classList.remove('font-arial');
                outputDiv.classList.add('font-lexend');
            }
        }

        function setBg(color) {
            if (color === 'white') {
                outputDiv.classList.remove('bg-purple');
                outputDiv.classList.add('bg-white');
            } else {
                outputDiv.classList.remove('bg-white');
                outputDiv.classList.add('bg-purple');
            }
        }

        // --- PDF & OCR Logic ---

        document.getElementById('pdf-upload').addEventListener('change', async function(e) {
            const file = e.target.files[0];
            if (!file) return;

            statusDiv.textContent = "Loading PDF...";
            outputDiv.textContent = ""; // Clear previous text

            const fileReader = new FileReader();
            
            fileReader.onload = async function() {
                const typedarray = new Uint8Array(this.result);

                try {
                    const pdf = await pdfjsLib.getDocument(typedarray).promise;
                    statusDiv.textContent = `PDF Loaded. Processing ${pdf.numPages} page(s)...`;
                    
                    // Initialize Tesseract worker
                    const worker = await Tesseract.createWorker('eng');

                    for (let i = 1; i <= pdf.numPages; i++) {
                        statusDiv.textContent = `Processing page ${i} of ${pdf.numPages}...`;
                        
                        // 1. Render PDF page to canvas
                        const page = await pdf.getPage(i);
                        const viewport = page.getViewport({ scale: 2.0 }); // Scale up for better OCR
                        
                        const canvas = document.createElement('canvas');
                        const context = canvas.getContext('2d');
                        canvas.height = viewport.height;
                        canvas.width = viewport.width;

                        await page.render({
                            canvasContext: context,
                            viewport: viewport
                        }).promise;

                        // 2. Perform OCR on the canvas
                        const { data: { text } } = await worker.recognize(canvas);
                        
                        // 3. Append text to output
                        const pageText = document.createElement('div');
                        pageText.style.marginBottom = "20px";
                        pageText.innerText = text;
                        outputDiv.appendChild(pageText);
                    }

                    await worker.terminate();
                    statusDiv.textContent = "Processing complete.";

                } catch (error) {
                    console.error(error);
                    statusDiv.textContent = "Error processing PDF. Please try a simpler file.";
                }
            };

            fileReader.readAsArrayBuffer(file);
        });
    </script>

https://apps.seeingmind.co.uk/ocr.html

Scroll to top