pdf_signature_extraction/paddleocr_client.py

#!/usr/bin/env python3
"""
PaddleOCR Client
Connects to remote PaddleOCR server for OCR inference
"""

import requests
import base64
import numpy as np
from typing import List, Dict, Tuple, Optional
from PIL import Image
from io import BytesIO

class PaddleOCRClient:
    """Client for remote PaddleOCR server."""

    def __init__(self, server_url: str = "http://192.168.30.36:5555"):
        """
        Initialize PaddleOCR client.

        Args:
            server_url: URL of the PaddleOCR server
        """
        self.server_url = server_url.rstrip('/')
        self.timeout = 30  # seconds

    def health_check(self) -> bool:
        """
        Check if server is healthy.

        Returns:
            True if server is healthy, False otherwise
        """
        try:
            response = requests.get(
                f"{self.server_url}/health",
                timeout=5
            )
            return response.status_code == 200 and response.json().get('status') == 'ok'
        except Exception as e:
            print(f"Health check failed: {e}")
            return False

    def ocr(self, image: np.ndarray) -> List[Dict]:
        """
        Perform OCR on an image.

        Args:
            image: numpy array of the image (RGB format)

        Returns:
            List of detection results, each containing:
                - box: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
                - text: detected text string
                - confidence: confidence score (0-1)

        Raises:
            Exception if OCR fails
        """
        # Convert numpy array to PIL Image
        if len(image.shape) == 2:  # Grayscale
            pil_image = Image.fromarray(image)
        else:  # RGB or RGBA
            pil_image = Image.fromarray(image.astype(np.uint8))

        # Encode to base64
        buffered = BytesIO()
        pil_image.save(buffered, format="PNG")
        image_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')

        # Send request
        try:
            response = requests.post(
                f"{self.server_url}/ocr",
                json={"image": image_base64},
                timeout=self.timeout
            )
            response.raise_for_status()

            result = response.json()

            if not result.get('success'):
                error_msg = result.get('error', 'Unknown error')
                raise Exception(f"OCR failed: {error_msg}")

            return result.get('results', [])

        except requests.exceptions.Timeout:
            raise Exception(f"OCR request timed out after {self.timeout} seconds")
        except requests.exceptions.ConnectionError:
            raise Exception(f"Could not connect to server at {self.server_url}")
        except Exception as e:
            raise Exception(f"OCR request failed: {str(e)}")

    def get_text_boxes(self, image: np.ndarray) -> List[Tuple[int, int, int, int]]:
        """
        Get bounding boxes of all detected text.

        Args:
            image: numpy array of the image

        Returns:
            List of bounding boxes as (x, y, w, h) tuples
        """
        results = self.ocr(image)
        boxes = []

        for result in results:
            box = result['box']  # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]

            # Convert polygon to bounding box
            xs = [point[0] for point in box]
            ys = [point[1] for point in box]

            x = int(min(xs))
            y = int(min(ys))
            w = int(max(xs) - min(xs))
            h = int(max(ys) - min(ys))

            boxes.append((x, y, w, h))

        return boxes

    def __repr__(self):
        return f"PaddleOCRClient(server_url='{self.server_url}')"


# Convenience function
def create_ocr_client(server_url: str = "http://192.168.30.36:5555") -> PaddleOCRClient:
    """
    Create and test PaddleOCR client.

    Args:
        server_url: URL of the PaddleOCR server

    Returns:
        PaddleOCRClient instance

    Raises:
        Exception if server is not reachable
    """
    client = PaddleOCRClient(server_url)

    if not client.health_check():
        raise Exception(
            f"PaddleOCR server at {server_url} is not responding. "
            "Make sure the server is running on the Linux machine."
        )

    return client


if __name__ == "__main__":
    # Test the client
    print("Testing PaddleOCR client...")

    try:
        client = create_ocr_client()
        print(f"✅ Connected to server: {client.server_url}")

        # Create a test image
        test_image = np.ones((100, 100, 3), dtype=np.uint8) * 255

        print("Running test OCR...")
        results = client.ocr(test_image)
        print(f"✅ OCR test successful! Found {len(results)} text regions")

    except Exception as e:
        print(f"❌ Error: {e}")