feat: add OCR functionality and related configurations

This commit is contained in:
CoderKang 2025-06-01 22:10:43 +08:00 committed by mertalev
parent 23fb2e0fae
commit 0e8ca1c159
No known key found for this signature in database
GPG key ID: DF6ABC77AAD98C95
64 changed files with 3998 additions and 1669 deletions

View file

@ -0,0 +1,47 @@
from typing import Any
import numpy as np
from numpy.typing import NDArray
from paddleocr import PaddleOCR
from PIL import Image
from immich_ml.models.base import InferenceModel
from immich_ml.models.transforms import decode_cv2
from immich_ml.schemas import OCROutput, ModelTask, ModelType
class PaddleOCRecognizer(InferenceModel):
depends = []
identity = (ModelType.OCR, ModelTask.OCR)
def __init__(self, model_name: str, min_score: float = 0.9, **model_kwargs: Any) -> None:
self.min_score = model_kwargs.pop("minScore", min_score)
super().__init__(model_name, **model_kwargs)
self._load()
self.loaded = True
def _load(self) -> None:
try:
self.model = PaddleOCR(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False
)
except Exception as e:
print(f"Error loading PaddleOCR model: {e}")
raise e
def _predict(self, inputs: NDArray[np.uint8] | bytes | Image.Image, **kwargs: Any) -> OCROutput:
inputs = decode_cv2(inputs)
results = self.model.predict(inputs)
valid_texts_and_scores = [
(text, score)
for result in results
for text, score in zip(result['rec_texts'], result['rec_scores'])
if score > self.min_score
]
if not valid_texts_and_scores:
return OCROutput(text="", confidence=0.0)
texts, scores = zip(*valid_texts_and_scores)
return OCROutput(
text="".join(texts),
confidence=sum(scores) / len(scores)
)