mirror of
				https://github.com/immich-app/immich
				synced 2025-10-17 18:19:27 +00:00 
			
		
		
		
	feat(ml): coreml (#17718)
* coreml * add test * use arena by default in native installation * fix tests * add env to docs * remove availability envs
This commit is contained in:
		
							parent
							
								
									43eccca86a
								
							
						
					
					
						commit
						1b62c2ef55
					
				
					 6 changed files with 47 additions and 6 deletions
				
			
		| 
						 | 
				
			
			@ -171,6 +171,7 @@ Redis (Sentinel) URL example JSON before encoding:
 | 
			
		|||
| `MACHINE_LEARNING_MAX_BATCH_SIZE__FACIAL_RECOGNITION`       | Set the maximum number of faces that will be processed at once by the facial recognition model      |  None (`1` if using OpenVINO)   | machine learning |
 | 
			
		||||
| `MACHINE_LEARNING_RKNN`                                     | Enable RKNN hardware acceleration if supported                                                      |             `True`              | machine learning |
 | 
			
		||||
| `MACHINE_LEARNING_RKNN_THREADS`                             | How many threads of RKNN runtime should be spinned up while inferencing.                            |               `1`               | machine learning |
 | 
			
		||||
| `MACHINE_LEARNING_MODEL_ARENA`                              | Pre-allocates CPU memory to avoid memory fragmentation                                              |              true               | machine learning |
 | 
			
		||||
 | 
			
		||||
\*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -70,7 +70,8 @@ RUN if [ "$DEVICE" = "rocm" ]; then \
 | 
			
		|||
 | 
			
		||||
FROM python:3.11-slim-bookworm@sha256:873f91540d53b36327ed4fb018c9669107a4e2a676719720edb4209c4b15d029 AS prod-cpu
 | 
			
		||||
 | 
			
		||||
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2
 | 
			
		||||
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
 | 
			
		||||
    MACHINE_LEARNING_MODEL_ARENA=false
 | 
			
		||||
 | 
			
		||||
FROM python:3.11-slim-bookworm@sha256:873f91540d53b36327ed4fb018c9669107a4e2a676719720edb4209c4b15d029 AS prod-openvino
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -88,7 +89,8 @@ RUN apt-get update && \
 | 
			
		|||
 | 
			
		||||
FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04@sha256:94c1577b2cd9dd6c0312dc04dff9cb2fdce2b268018abc3d7c2dbcacf1155000 AS prod-cuda
 | 
			
		||||
 | 
			
		||||
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2
 | 
			
		||||
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
 | 
			
		||||
    MACHINE_LEARNING_MODEL_ARENA=false
 | 
			
		||||
 | 
			
		||||
RUN apt-get update && \
 | 
			
		||||
    apt-get install --no-install-recommends -yqq libcudnn9-cuda-12 && \
 | 
			
		||||
| 
						 | 
				
			
			@ -104,7 +106,8 @@ FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:1f7e92ca7e3a3785680473329ed1091
 | 
			
		|||
FROM prod-cpu AS prod-armnn
 | 
			
		||||
 | 
			
		||||
ENV LD_LIBRARY_PATH=/opt/armnn \
 | 
			
		||||
    LD_PRELOAD=/usr/lib/libmimalloc.so.2
 | 
			
		||||
    LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
 | 
			
		||||
    MACHINE_LEARNING_MODEL_ARENA=false
 | 
			
		||||
 | 
			
		||||
RUN apt-get update && apt-get install -y --no-install-recommends ocl-icd-libopencl1 mesa-opencl-icd libgomp1 && \
 | 
			
		||||
    rm -rf /var/lib/apt/lists/* && \
 | 
			
		||||
| 
						 | 
				
			
			@ -127,7 +130,8 @@ FROM prod-cpu AS prod-rknn
 | 
			
		|||
# renovate: datasource=github-tags depName=airockchip/rknn-toolkit2
 | 
			
		||||
ARG RKNN_TOOLKIT_VERSION="v2.3.0"
 | 
			
		||||
 | 
			
		||||
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2
 | 
			
		||||
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
 | 
			
		||||
    MACHINE_LEARNING_MODEL_ARENA=false
 | 
			
		||||
 | 
			
		||||
ADD --checksum=sha256:73993ed4b440460825f21611731564503cc1d5a0c123746477da6cd574f34885 "https://github.com/airockchip/rknn-toolkit2/raw/refs/tags/${RKNN_TOOLKIT_VERSION}/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so" /usr/lib/
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -61,6 +61,7 @@ class Settings(BaseSettings):
 | 
			
		|||
    request_threads: int = os.cpu_count() or 4
 | 
			
		||||
    model_inter_op_threads: int = 0
 | 
			
		||||
    model_intra_op_threads: int = 0
 | 
			
		||||
    model_arena: bool = True
 | 
			
		||||
    ann: bool = True
 | 
			
		||||
    ann_fp16_turbo: bool = False
 | 
			
		||||
    ann_tuning_level: int = 2
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -79,6 +79,7 @@ SUPPORTED_PROVIDERS = [
 | 
			
		|||
    "CUDAExecutionProvider",
 | 
			
		||||
    "ROCMExecutionProvider",
 | 
			
		||||
    "OpenVINOExecutionProvider",
 | 
			
		||||
    "CoreMLExecutionProvider",
 | 
			
		||||
    "CPUExecutionProvider",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -96,6 +96,14 @@ class OrtSession:
 | 
			
		|||
                        "precision": "FP32",
 | 
			
		||||
                        "cache_dir": (self.model_path.parent / "openvino").as_posix(),
 | 
			
		||||
                    }
 | 
			
		||||
                case "CoreMLExecutionProvider":
 | 
			
		||||
                    options = {
 | 
			
		||||
                        "ModelFormat": "MLProgram",
 | 
			
		||||
                        "MLComputeUnits": "ALL",
 | 
			
		||||
                        "SpecializationStrategy": "FastPrediction",
 | 
			
		||||
                        "AllowLowPrecisionAccumulationOnGPU": "1",
 | 
			
		||||
                        "ModelCacheDirectory": (self.model_path.parent / "coreml").as_posix(),
 | 
			
		||||
                    }
 | 
			
		||||
                case _:
 | 
			
		||||
                    options = {}
 | 
			
		||||
            provider_options.append(options)
 | 
			
		||||
| 
						 | 
				
			
			@ -115,7 +123,7 @@ class OrtSession:
 | 
			
		|||
    @property
 | 
			
		||||
    def _sess_options_default(self) -> ort.SessionOptions:
 | 
			
		||||
        sess_options = ort.SessionOptions()
 | 
			
		||||
        sess_options.enable_cpu_mem_arena = False
 | 
			
		||||
        sess_options.enable_cpu_mem_arena = settings.model_arena
 | 
			
		||||
 | 
			
		||||
        # avoid thread contention between models
 | 
			
		||||
        if settings.model_inter_op_threads > 0:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -180,6 +180,7 @@ class TestOrtSession:
 | 
			
		|||
    CUDA_EP_OUT_OF_ORDER = ["CPUExecutionProvider", "CUDAExecutionProvider"]
 | 
			
		||||
    TRT_EP = ["TensorrtExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"]
 | 
			
		||||
    ROCM_EP = ["ROCMExecutionProvider", "CPUExecutionProvider"]
 | 
			
		||||
    COREML_EP = ["CoreMLExecutionProvider", "CPUExecutionProvider"]
 | 
			
		||||
 | 
			
		||||
    @pytest.mark.providers(CPU_EP)
 | 
			
		||||
    def test_sets_cpu_provider(self, providers: list[str]) -> None:
 | 
			
		||||
| 
						 | 
				
			
			@ -225,6 +226,12 @@ class TestOrtSession:
 | 
			
		|||
 | 
			
		||||
        assert session.providers == self.ROCM_EP
 | 
			
		||||
 | 
			
		||||
    @pytest.mark.providers(COREML_EP)
 | 
			
		||||
    def test_uses_coreml(self, providers: list[str]) -> None:
 | 
			
		||||
        session = OrtSession("ViT-B-32__openai")
 | 
			
		||||
 | 
			
		||||
        assert session.providers == self.COREML_EP
 | 
			
		||||
 | 
			
		||||
    def test_sets_provider_kwarg(self) -> None:
 | 
			
		||||
        providers = ["CUDAExecutionProvider"]
 | 
			
		||||
        session = OrtSession("ViT-B-32__openai", providers=providers)
 | 
			
		||||
| 
						 | 
				
			
			@ -284,7 +291,6 @@ class TestOrtSession:
 | 
			
		|||
        assert session.sess_options.execution_mode == ort.ExecutionMode.ORT_SEQUENTIAL
 | 
			
		||||
        assert session.sess_options.inter_op_num_threads == 1
 | 
			
		||||
        assert session.sess_options.intra_op_num_threads == 2
 | 
			
		||||
        assert session.sess_options.enable_cpu_mem_arena is False
 | 
			
		||||
 | 
			
		||||
    def test_sets_default_sess_options_does_not_set_threads_if_non_cpu_and_default_threads(self) -> None:
 | 
			
		||||
        session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
 | 
			
		||||
| 
						 | 
				
			
			@ -302,6 +308,26 @@ class TestOrtSession:
 | 
			
		|||
        assert session.sess_options.inter_op_num_threads == 2
 | 
			
		||||
        assert session.sess_options.intra_op_num_threads == 4
 | 
			
		||||
 | 
			
		||||
    def test_uses_arena_if_enabled(self, mocker: MockerFixture) -> None:
 | 
			
		||||
        mock_settings = mocker.patch("immich_ml.sessions.ort.settings", autospec=True)
 | 
			
		||||
        mock_settings.model_inter_op_threads = 0
 | 
			
		||||
        mock_settings.model_intra_op_threads = 0
 | 
			
		||||
        mock_settings.model_arena = True
 | 
			
		||||
 | 
			
		||||
        session = OrtSession("ViT-B-32__openai", providers=["CPUExecutionProvider"])
 | 
			
		||||
 | 
			
		||||
        assert session.sess_options.enable_cpu_mem_arena
 | 
			
		||||
 | 
			
		||||
    def test_does_not_use_arena_if_disabled(self, mocker: MockerFixture) -> None:
 | 
			
		||||
        mock_settings = mocker.patch("immich_ml.sessions.ort.settings", autospec=True)
 | 
			
		||||
        mock_settings.model_inter_op_threads = 0
 | 
			
		||||
        mock_settings.model_intra_op_threads = 0
 | 
			
		||||
        mock_settings.model_arena = False
 | 
			
		||||
 | 
			
		||||
        session = OrtSession("ViT-B-32__openai", providers=["CPUExecutionProvider"])
 | 
			
		||||
 | 
			
		||||
        assert not session.sess_options.enable_cpu_mem_arena
 | 
			
		||||
 | 
			
		||||
    def test_sets_sess_options_kwarg(self) -> None:
 | 
			
		||||
        sess_options = ort.SessionOptions()
 | 
			
		||||
        session = OrtSession(
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue