feat(ml): composable ml (#9973)

* modularize model classes * various fixes * expose port * change response * round coordinates * simplify preload * update server * simplify interface simplify * update tests * composable endpoint * cleanup fixes remove unnecessary interface support text input, cleanup * ew camelcase * update server server fixes fix typing * ml fixes update locustfile fixes * cleaner response * better repo response * update tests formatting and typing rename * undo compose change * linting fix type actually fix typing * stricter typing fix detection-only response no need for defaultdict * update spec file update api linting * update e2e * unnecessary dimension * remove commented code * remove duplicate code * remove unused imports * add batch dim
2025-11-07 17:27:20 +00:00 · 2024-06-06 23:09:47 -04:00 · 2024-06-06 23:09:47 -04:00 · 2b1b43a7e4
commit 2b1b43a7e4
parent 7a46f80ddc
39 changed files with 982 additions and 999 deletions
--- a/machine-learning/app/models/cache.py
+++ b/machine-learning/app/models/cache.py
@ -5,9 +5,9 @@ from aiocache.lock import OptimisticLock
 from aiocache.plugins import TimingPlugin

 from app.models import from_model_type
+from app.models.base import InferenceModel

-from ..schemas import ModelType, has_profiling
-from .base import InferenceModel
+from ..schemas import ModelTask, ModelType, has_profiling


 class ModelCache:
@ -31,28 +31,21 @@ class ModelCache:
        if profiling:
            plugins.append(TimingPlugin())

-        self.revalidate_enable = revalidate
+        self.should_revalidate = revalidate

        self.cache = SimpleMemoryCache(timeout=timeout, plugins=plugins, namespace=None)

-    async def get(self, model_name: str, model_type: ModelType, **model_kwargs: Any) -> InferenceModel:
-        """
-        Args:
-            model_name: Name of model in the model hub used for the task.
-            model_type: Model type or task, which determines which model zoo is used.
-
-        Returns:
-            model: The requested model.
-        """
-
-        key = f"{model_name}{model_type.value}{model_kwargs.get('mode', '')}"
+    async def get(
+        self, model_name: str, model_type: ModelType, model_task: ModelTask, **model_kwargs: Any
+    ) -> InferenceModel:
+        key = f"{model_name}{model_type}{model_task}"

        async with OptimisticLock(self.cache, key) as lock:
            model: InferenceModel | None = await self.cache.get(key)
            if model is None:
-                model = from_model_type(model_type, model_name, **model_kwargs)
+                model = from_model_type(model_name, model_type, model_task, **model_kwargs)
                await lock.cas(model, ttl=model_kwargs.get("ttl", None))
-            elif self.revalidate_enable:
+            elif self.should_revalidate:
                await self.revalidate(key, model_kwargs.get("ttl", None))
        return model