immich/machine-learning/immich_ml/models/ocr/paddle.py

from typing import Any

import numpy as np
from numpy.typing import NDArray
from paddleocr import PaddleOCR
from PIL import Image
from immich_ml.models.base import InferenceModel
from immich_ml.models.transforms import decode_cv2
from immich_ml.schemas import OCROutput, ModelTask, ModelType

class PaddleOCRecognizer(InferenceModel):
    depends = []
    identity = (ModelType.OCR, ModelTask.OCR)

    def __init__(self, model_name: str, min_score: float = 0.9, **model_kwargs: Any) -> None:
        self.min_score = model_kwargs.pop("minScore", min_score)
        super().__init__(model_name, **model_kwargs)
        self._load()
        self.loaded = True

    def _load(self) -> None:
        try:
            self.model = PaddleOCR(
                use_doc_orientation_classify=False,
                use_doc_unwarping=False,
                use_textline_orientation=False
            )
        except Exception as e:
            print(f"Error loading PaddleOCR model: {e}")
            raise e

    def _predict(self, inputs: NDArray[np.uint8] | bytes | Image.Image, **kwargs: Any) -> OCROutput:
        inputs = decode_cv2(inputs)
        results = self.model.predict(inputs)
        valid_texts_and_scores = [
            (text, score)
            for result in results
            for text, score in zip(result['rec_texts'], result['rec_scores'])
            if score > self.min_score
        ]
        if not valid_texts_and_scores:
            return OCROutput(text="", confidence=0.0)
        texts, scores = zip(*valid_texts_and_scores)
        return OCROutput(
            text="".join(texts),
            confidence=sum(scores) / len(scores)
        )