feat: add OCR functionality and related configurations

2025-11-07 17:27:20 +00:00 · 2025-06-01 22:10:43 +08:00 · 2025-06-01 22:10:43 +08:00 · 0e8ca1c159
commit 0e8ca1c159
parent 23fb2e0fae
64 changed files with 3998 additions and 1669 deletions
--- a/machine-learning/immich_ml/schemas.py
+++ b/machine-learning/immich_ml/schemas.py
@ -23,14 +23,14 @@ class BoundingBox(TypedDict):
 class ModelTask(StrEnum):
    FACIAL_RECOGNITION = "facial-recognition"
    SEARCH = "clip"
-
+    OCR = "ocr"

 class ModelType(StrEnum):
    DETECTION = "detection"
    RECOGNITION = "recognition"
    TEXTUAL = "textual"
    VISUAL = "visual"
-
+    OCR = "ocr"

 class ModelFormat(StrEnum):
    ARMNN = "armnn"
@ -42,7 +42,7 @@ class ModelSource(StrEnum):
    INSIGHTFACE = "insightface"
    MCLIP = "mclip"
    OPENCLIP = "openclip"
-
+    PADDLE = "paddle"

 ModelIdentity = tuple[ModelType, ModelTask]

@ -87,6 +87,11 @@ class DetectedFace(TypedDict):
 FacialRecognitionOutput = list[DetectedFace]


+class OCROutput(TypedDict):
+    text: str
+    confidence: float
+
+
 class PipelineEntry(TypedDict):
    modelName: str
    options: dict[str, Any]