feat(ml): model unloading (#2661)

* model cache * fixed revalidation when using cache namespace * fixed ttl not being set, added lock
2025-10-17 18:19:27 +00:00 · 2023-06-06 21:48:51 -04:00 · 2023-06-06 21:48:51 -04:00 · d0cc231782
commit d0cc231782
parent 6ce35d47f5
5 changed files with 400 additions and 233 deletions
--- a/machine-learning/app/main.py
+++ b/machine-learning/app/main.py
@ -1,5 +1,7 @@
 import os
 from typing import Any
+
+from cache import ModelCache
 from schemas import (
    EmbeddingResponse,
    FaceResponse,
@ -9,16 +11,11 @@ from schemas import (
    TextResponse,
    VisionModelRequest,
 )
-import cv2 as cv
 import uvicorn

-from insightface.app import FaceAnalysis
-from transformers import pipeline
-from sentence_transformers import SentenceTransformer
-from transformers import Pipeline
 from PIL import Image
-from fastapi import FastAPI
-
+from fastapi import FastAPI, HTTPException
+from models import get_model, run_classification, run_facial_recognition

 classification_model = os.getenv(
    "MACHINE_LEARNING_CLASSIFICATION_MODEL", "microsoft/resnet-50"
@ -29,21 +26,20 @@ facial_recognition_model = os.getenv(
    "MACHINE_LEARNING_FACIAL_RECOGNITION_MODEL", "buffalo_l"
 )

-min_face_score = float(os.getenv("MACHINE_LEARNING_MIN_FACE_SCORE", 0.7))
 min_tag_score = float(os.getenv("MACHINE_LEARNING_MIN_TAG_SCORE", 0.9))
 eager_startup = (
    os.getenv("MACHINE_LEARNING_EAGER_STARTUP", "true") == "true"
 )  # loads all models at startup
+model_ttl = int(os.getenv("MACHINE_LEARNING_MODEL_TTL", 300))

-cache_folder = os.getenv("MACHINE_LEARNING_CACHE_FOLDER", "/cache")
-
-_model_cache = {}
-
+_model_cache = None
 app = FastAPI()


@app.on_event("startup")
 async def startup_event() -> None:
+    global _model_cache
+    _model_cache = ModelCache(ttl=model_ttl, revalidate=True)
    models = [
        (classification_model, "image-classification"),
        (clip_image_model, "clip"),
@ -54,9 +50,9 @@ async def startup_event() -> None:
    # Get all models
    for model_name, model_type in models:
        if eager_startup:
-            get_cached_model(model_name, model_type)
+            await _model_cache.get_cached_model(model_name, model_type)
        else:
-            _get_model(model_name, model_type)
+            get_model(model_name, model_type)


@app.get("/", response_model=MessageResponse)
@ -70,10 +66,14 @@ def ping() -> str:


@app.post("/image-classifier/tag-image", response_model=TagResponse, status_code=200)
-def image_classification(payload: VisionModelRequest) -> list[str]:
-    model = get_cached_model(classification_model, "image-classification")
-    assetPath = payload.image_path
-    labels = run_engine(model, assetPath)
+async def image_classification(payload: VisionModelRequest) -> list[str]:
+    if _model_cache is None:
+        raise HTTPException(status_code=500, detail="Unable to load model.")
+
+    model = await _model_cache.get_cached_model(
+        classification_model, "image-classification"
+    )
+    labels = run_classification(model, payload.image_path, min_tag_score)
    return labels


@ -82,10 +82,14 @@ def image_classification(payload: VisionModelRequest) -> list[str]:
    response_model=EmbeddingResponse,
    status_code=200,
 )
-def clip_encode_image(payload: VisionModelRequest) -> list[float]:
-    model = get_cached_model(clip_image_model, "clip")
+async def clip_encode_image(payload: VisionModelRequest) -> list[float]:
+    if _model_cache is None:
+        raise HTTPException(status_code=500, detail="Unable to load model.")
+
+    model = await _model_cache.get_cached_model(clip_image_model, "clip")
    image = Image.open(payload.image_path)
-    return model.encode(image).tolist()
+    embedding = model.encode(image).tolist()
+    return embedding


@app.post(
@ -93,82 +97,27 @@ def clip_encode_image(payload: VisionModelRequest) -> list[float]:
    response_model=EmbeddingResponse,
    status_code=200,
 )
-def clip_encode_text(payload: TextModelRequest) -> list[float]:
-    model = get_cached_model(clip_text_model, "clip")
-    return model.encode(payload.text).tolist()
+async def clip_encode_text(payload: TextModelRequest) -> list[float]:
+    if _model_cache is None:
+        raise HTTPException(status_code=500, detail="Unable to load model.")
+
+    model = await _model_cache.get_cached_model(clip_text_model, "clip")
+    embedding = model.encode(payload.text).tolist()
+    return embedding


@app.post(
    "/facial-recognition/detect-faces", response_model=FaceResponse, status_code=200
 )
-def facial_recognition(payload: VisionModelRequest) -> list[dict[str, Any]]:
-    model = get_cached_model(facial_recognition_model, "facial-recognition")
-    img = cv.imread(payload.image_path)
-    height, width, _ = img.shape
-    results = []
-    faces = model.get(img)
+async def facial_recognition(payload: VisionModelRequest) -> list[dict[str, Any]]:
+    if _model_cache is None:
+        raise HTTPException(status_code=500, detail="Unable to load model.")

-    for face in faces:
-        if face.det_score < min_face_score:
-            continue
-        x1, y1, x2, y2 = face.bbox
-
-        results.append(
-            {
-                "imageWidth": width,
-                "imageHeight": height,
-                "boundingBox": {
-                    "x1": round(x1),
-                    "y1": round(y1),
-                    "x2": round(x2),
-                    "y2": round(y2),
-                },
-                "score": face.det_score.item(),
-                "embedding": face.normed_embedding.tolist(),
-            }
-        )
-    return results
-
-
-def run_engine(engine: Pipeline, path: str) -> list[str]:
-    result: list[str] = []
-    predictions: list[dict[str, Any]] = engine(path)  # type: ignore
-
-    for pred in predictions:
-        tags = pred["label"].split(", ")
-        if pred["score"] > min_tag_score:
-            result = [*result, *tags]
-
-    if len(result) > 1:
-        result = list(set(result))
-
-    return result
-
-
-def get_cached_model(model, task) -> Any:
-    global _model_cache
-    key = "|".join([model, str(task)])
-    if key not in _model_cache:
-        model = _get_model(model, task)
-        _model_cache[key] = model
-
-    return _model_cache[key]
-
-
-def _get_model(model, task) -> Any:
-    match task:
-        case "facial-recognition":
-            model = FaceAnalysis(
-                name=model,
-                root=cache_folder,
-                allowed_modules=["detection", "recognition"],
-            )
-            model.prepare(ctx_id=0, det_size=(640, 640))
-        case "clip":
-            model = SentenceTransformer(model, cache_folder=cache_folder)
-        case _:
-            model = pipeline(model=model, task=task)
-    return model
+    model = await _model_cache.get_cached_model(
+        facial_recognition_model, "facial-recognition"
+    )
+    faces = run_facial_recognition(model, payload.image_path)
+    return faces


 if __name__ == "__main__":