feat(ml): composable ml (#9973)

* modularize model classes * various fixes * expose port * change response * round coordinates * simplify preload * update server * simplify interface simplify * update tests * composable endpoint * cleanup fixes remove unnecessary interface support text input, cleanup * ew camelcase * update server server fixes fix typing * ml fixes update locustfile fixes * cleaner response * better repo response * update tests formatting and typing rename * undo compose change * linting fix type actually fix typing * stricter typing fix detection-only response no need for defaultdict * update spec file update api linting * update e2e * unnecessary dimension * remove commented code * remove duplicate code * remove unused imports * add batch dim
2025-11-07 17:27:20 +00:00 · 2024-06-06 23:09:47 -04:00 · 2024-06-06 23:09:47 -04:00 · 2b1b43a7e4
commit 2b1b43a7e4
parent 7a46f80ddc
39 changed files with 982 additions and 999 deletions
--- a/machine-learning/app/schemas.py
+++ b/machine-learning/app/schemas.py
@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Any, Protocol, TypedDict, TypeGuard
+from typing import Any, Literal, Protocol, TypedDict, TypeGuard, TypeVar

 import numpy as np
 import numpy.typing as npt
@ -28,31 +28,87 @@ class BoundingBox(TypedDict):
    y2: int


-class ModelType(StrEnum):
-    CLIP = "clip"
+class ModelTask(StrEnum):
    FACIAL_RECOGNITION = "facial-recognition"
+    SEARCH = "clip"


-class ModelRuntime(StrEnum):
-    ONNX = "onnx"
+class ModelType(StrEnum):
+    DETECTION = "detection"
+    RECOGNITION = "recognition"
+    TEXTUAL = "textual"
+    VISUAL = "visual"
+
+
+class ModelFormat(StrEnum):
    ARMNN = "armnn"
+    ONNX = "onnx"
+
+
+class ModelSource(StrEnum):
+    INSIGHTFACE = "insightface"
+    MCLIP = "mclip"
+    OPENCLIP = "openclip"
+
+
+ModelIdentity = tuple[ModelType, ModelTask]
+
+
+class ModelSession(Protocol):
+    def run(
+        self,
+        output_names: list[str] | None,
+        input_feed: dict[str, npt.NDArray[np.float32]] | dict[str, npt.NDArray[np.int32]],
+        run_options: Any = None,
+    ) -> list[npt.NDArray[np.float32]]: ...


 class HasProfiling(Protocol):
    profiling: dict[str, float]


-class Face(TypedDict):
+class FaceDetectionOutput(TypedDict):
+    boxes: npt.NDArray[np.float32]
+    scores: npt.NDArray[np.float32]
+    landmarks: npt.NDArray[np.float32]
+
+
+class DetectedFace(TypedDict):
    boundingBox: BoundingBox
    embedding: npt.NDArray[np.float32]
-    imageWidth: int
-    imageHeight: int
    score: float


+FacialRecognitionOutput = list[DetectedFace]
+
+
+class PipelineEntry(TypedDict):
+    modelName: str
+    options: dict[str, Any]
+
+
+PipelineRequest = dict[ModelTask, dict[ModelType, PipelineEntry]]
+
+
+class InferenceEntry(TypedDict):
+    name: str
+    task: ModelTask
+    type: ModelType
+    options: dict[str, Any]
+
+
+InferenceEntries = tuple[list[InferenceEntry], list[InferenceEntry]]
+
+
+InferenceResponse = dict[ModelTask | Literal["imageHeight"] | Literal["imageWidth"], Any]
+
+
 def has_profiling(obj: Any) -> TypeGuard[HasProfiling]:
    return hasattr(obj, "profiling") and isinstance(obj.profiling, dict)


 def is_ndarray(obj: Any, dtype: "type[np._DTypeScalar_co]") -> "TypeGuard[npt.NDArray[np._DTypeScalar_co]]":
    return isinstance(obj, np.ndarray) and obj.dtype == dtype
+
+
+T = TypeVar("T")