#!/usr/bin/env python3 """ PaddleOCR Client Connects to remote PaddleOCR server for OCR inference """ import requests import base64 import numpy as np from typing import List, Dict, Tuple, Optional from PIL import Image from io import BytesIO class PaddleOCRClient: """Client for remote PaddleOCR server.""" def __init__(self, server_url: str = "http://192.168.30.36:5555"): """ Initialize PaddleOCR client. Args: server_url: URL of the PaddleOCR server """ self.server_url = server_url.rstrip('/') self.timeout = 30 # seconds def health_check(self) -> bool: """ Check if server is healthy. Returns: True if server is healthy, False otherwise """ try: response = requests.get( f"{self.server_url}/health", timeout=5 ) return response.status_code == 200 and response.json().get('status') == 'ok' except Exception as e: print(f"Health check failed: {e}") return False def ocr(self, image: np.ndarray) -> List[Dict]: """ Perform OCR on an image. Args: image: numpy array of the image (RGB format) Returns: List of detection results, each containing: - box: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] - text: detected text string - confidence: confidence score (0-1) Raises: Exception if OCR fails """ # Convert numpy array to PIL Image if len(image.shape) == 2: # Grayscale pil_image = Image.fromarray(image) else: # RGB or RGBA pil_image = Image.fromarray(image.astype(np.uint8)) # Encode to base64 buffered = BytesIO() pil_image.save(buffered, format="PNG") image_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8') # Send request try: response = requests.post( f"{self.server_url}/ocr", json={"image": image_base64}, timeout=self.timeout ) response.raise_for_status() result = response.json() if not result.get('success'): error_msg = result.get('error', 'Unknown error') raise Exception(f"OCR failed: {error_msg}") return result.get('results', []) except requests.exceptions.Timeout: raise Exception(f"OCR request timed out after {self.timeout} seconds") except requests.exceptions.ConnectionError: raise Exception(f"Could not connect to server at {self.server_url}") except Exception as e: raise Exception(f"OCR request failed: {str(e)}") def get_text_boxes(self, image: np.ndarray) -> List[Tuple[int, int, int, int]]: """ Get bounding boxes of all detected text. Args: image: numpy array of the image Returns: List of bounding boxes as (x, y, w, h) tuples """ results = self.ocr(image) boxes = [] for result in results: box = result['box'] # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] # Convert polygon to bounding box xs = [point[0] for point in box] ys = [point[1] for point in box] x = int(min(xs)) y = int(min(ys)) w = int(max(xs) - min(xs)) h = int(max(ys) - min(ys)) boxes.append((x, y, w, h)) return boxes def __repr__(self): return f"PaddleOCRClient(server_url='{self.server_url}')" # Convenience function def create_ocr_client(server_url: str = "http://192.168.30.36:5555") -> PaddleOCRClient: """ Create and test PaddleOCR client. Args: server_url: URL of the PaddleOCR server Returns: PaddleOCRClient instance Raises: Exception if server is not reachable """ client = PaddleOCRClient(server_url) if not client.health_check(): raise Exception( f"PaddleOCR server at {server_url} is not responding. " "Make sure the server is running on the Linux machine." ) return client if __name__ == "__main__": # Test the client print("Testing PaddleOCR client...") try: client = create_ocr_client() print(f"✅ Connected to server: {client.server_url}") # Create a test image test_image = np.ones((100, 100, 3), dtype=np.uint8) * 255 print("Running test OCR...") results = client.ocr(test_image) print(f"✅ OCR test successful! Found {len(results)} text regions") except Exception as e: print(f"❌ Error: {e}")