immich/machine-learning/immich_ml/models/ocr/schemas.py

29 lines
785 B
Python
Raw Normal View History

2025-06-10 17:34:52 -04:00
from typing import Iterable
import numpy as np
import numpy.typing as npt
from rapidocr.utils.typings import EngineType, LangRec
2025-06-10 17:34:52 -04:00
from typing_extensions import TypedDict
class TextDetectionOutput(TypedDict):
image: npt.NDArray[np.float32]
2025-06-10 17:34:52 -04:00
boxes: npt.NDArray[np.float32]
2025-06-13 00:39:39 -04:00
scores: npt.NDArray[np.float32]
2025-06-10 17:34:52 -04:00
class TextRecognitionOutput(TypedDict):
box: npt.NDArray[np.float32]
boxScore: npt.NDArray[np.float32]
2025-06-10 17:34:52 -04:00
text: Iterable[str]
textScore: npt.NDArray[np.float32]
2025-06-10 17:34:52 -04:00
# RapidOCR expects `engine_type`, `lang_type`, and `font_path` to be attributes
2025-06-10 17:34:52 -04:00
class OcrOptions(dict):
def __init__(self, **options):
super().__init__(**options)
self.engine_type = EngineType.ONNXRUNTIME
self.lang_type = LangRec.CH
self.font_path = None