chore(deps): update machine-learning (#6302)

* chore(deps): update machine-learning * fix typing, use new lifespan syntax * wrap in try / finally * move log --------- Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
2025-10-17 18:19:27 +00:00 · 2024-01-13 05:00:09 +00:00 · 2024-01-13 05:00:09 +00:00 · 20be42cec0
commit 20be42cec0
parent bd5ae9f31e
12 changed files with 238 additions and 212 deletions
--- a/machine-learning/app/main.py
+++ b/machine-learning/app/main.py
@ -2,11 +2,11 @@ import asyncio
 import gc
 import os
 import signal
-import sys
 import threading
 import time
 from concurrent.futures import ThreadPoolExecutor
-from typing import Any, Iterator
+from contextlib import asynccontextmanager
+from typing import Any, AsyncGenerator, Iterator
 from zipfile import BadZipFile

 import orjson
@ -26,7 +26,6 @@ from .schemas import (
 )

 MultiPartParser.max_file_size = 2**26  # spools to disk if payload is 64 MiB or larger
-app = FastAPI()

 model_cache = ModelCache(ttl=settings.model_ttl, revalidate=settings.model_ttl > 0)
 thread_pool: ThreadPoolExecutor | None = None
@ -35,8 +34,8 @@ active_requests = 0
 last_called: float | None = None


-@app.on_event("startup")
-def startup() -> None:
+@asynccontextmanager
+async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]:
    global thread_pool
    log.info(
        (
@ -44,21 +43,22 @@ def startup() -> None:
            f"{f'after {settings.model_ttl}s of inactivity' if settings.model_ttl > 0 else 'disabled'}."
        )
    )
-    # asyncio is a huge bottleneck for performance, so we use a thread pool to run blocking code
-    thread_pool = ThreadPoolExecutor(settings.request_threads) if settings.request_threads > 0 else None
-    if settings.model_ttl > 0 and settings.model_ttl_poll_s > 0:
-        asyncio.ensure_future(idle_shutdown_task())
-    log.info(f"Initialized request thread pool with {settings.request_threads} threads.")
-
-
-@app.on_event("shutdown")
-def shutdown() -> None:
-    log.handlers.clear()
-    for model in model_cache.cache._cache.values():
-        del model
-    if thread_pool is not None:
-        thread_pool.shutdown()
-    gc.collect()
+    
+    try:
+        if settings.request_threads > 0:
+            # asyncio is a huge bottleneck for performance, so we use a thread pool to run blocking code
+            thread_pool = ThreadPoolExecutor(settings.request_threads) if settings.request_threads > 0 else None
+            log.info(f"Initialized request thread pool with {settings.request_threads} threads.")
+        if settings.model_ttl > 0 and settings.model_ttl_poll_s > 0:
+            asyncio.ensure_future(idle_shutdown_task())
+        yield
+    finally:
+        log.handlers.clear()
+        for model in model_cache.cache._cache.values():
+            del model
+        if thread_pool is not None:
+            thread_pool.shutdown()
+        gc.collect()


 def update_state() -> Iterator[None]:
@ -71,6 +71,9 @@ def update_state() -> Iterator[None]:
        active_requests -= 1


+app = FastAPI(lifespan=lifespan)
+
+
@app.get("/", response_model=MessageResponse)
 async def root() -> dict[str, str]:
    return {"message": "Immich ML"}