From 0145c04f0ac312f2e9d91acb7e65caadb9e4b193 Mon Sep 17 00:00:00 2001 From: Aleksander Date: Wed, 15 Oct 2025 03:21:07 -0700 Subject: [PATCH] Enable OpenVINO CPU acceleration in Immich --- machine-learning/immich_ml/sessions/ort.py | 32 +++++++++++++++++++--- machine-learning/test_main.py | 30 ++++++++++++++++++++ 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/machine-learning/immich_ml/sessions/ort.py b/machine-learning/immich_ml/sessions/ort.py index d18aae751a..b12c210b06 100644 --- a/machine-learning/immich_ml/sessions/ort.py +++ b/machine-learning/immich_ml/sessions/ort.py @@ -66,9 +66,8 @@ class OrtSession: device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids() log.debug(f"Available OpenVINO devices: {device_ids}") - gpu_devices = [device_id for device_id in device_ids if device_id.startswith("GPU")] - if not gpu_devices: - log.warning("No GPU device found in OpenVINO. Falling back to CPU.") + if not device_ids: + log.warning("No device found in OpenVINO. Falling back to CPU.") available_providers.remove(openvino) return [provider for provider in SUPPORTED_PROVIDERS if provider in available_providers] @@ -91,8 +90,17 @@ class OrtSession: case "CUDAExecutionProvider" | "ROCMExecutionProvider": options = {"arena_extend_strategy": "kSameAsRequested", "device_id": settings.device_id} case "OpenVINOExecutionProvider": + device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids() + # Check for available devices, preferring GPU over CPU + gpu_devices = [d for d in device_ids if d.startswith("GPU")] + if gpu_devices: + device_type = f"GPU.{settings.device_id}" + log.debug(f"OpenVINO: Using GPU device {device_type}") + else: + device_type = "CPU" + log.debug("OpenVINO: No GPU found, using CPU") options = { - "device_type": f"GPU.{settings.device_id}", + "device_type": device_type, "precision": "FP32", "cache_dir": (self.model_path.parent / "openvino").as_posix(), } @@ -126,18 +134,34 @@ class OrtSession: sess_options.enable_cpu_mem_arena = settings.model_arena # avoid thread contention between models + # Set inter_op threads if settings.model_inter_op_threads > 0: sess_options.inter_op_num_threads = settings.model_inter_op_threads # these defaults work well for CPU, but bottleneck GPU elif settings.model_inter_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: sess_options.inter_op_num_threads = 1 + elif settings.model_inter_op_threads == 0 and ( + "OpenVINOExecutionProvider" in self.providers and self._provider_options[0].get("device_type") == "CPU" + ): + sess_options.inter_op_num_threads = 1 + # Set intra_op threads if settings.model_intra_op_threads > 0: sess_options.intra_op_num_threads = settings.model_intra_op_threads elif settings.model_intra_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: sess_options.intra_op_num_threads = 2 + elif settings.model_intra_op_threads == 0 and ( + "OpenVINOExecutionProvider" in self.providers and self._provider_options[0].get("device_type") == "CPU" + ): + sess_options.intra_op_num_threads = 1 if sess_options.inter_op_num_threads > 1: sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL + log.debug( + f"_sess_options_default returning: " + f"inter_op_num_threads={getattr(sess_options, 'inter_op_num_threads', None)}, " + f"intra_op_num_threads={getattr(sess_options, 'intra_op_num_threads', None)}, " + f"execution_mode={getattr(sess_options, 'execution_mode', None)}" + ) return sess_options diff --git a/machine-learning/test_main.py b/machine-learning/test_main.py index 582a05a950..20f9ec9ac9 100644 --- a/machine-learning/test_main.py +++ b/machine-learning/test_main.py @@ -262,6 +262,19 @@ class TestOrtSession: } ] + @pytest.mark.ov_device_ids(["CPU"]) + def test_sets_provider_options_for_openvino_cpu(self, ov_device_ids: list[str]) -> None: + model_path = "/cache/ViT-B-32__openai/model.onnx" + session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"]) + + assert session.provider_options == [ + { + "device_type": "CPU", + "precision": "FP32", + "cache_dir": "/cache/ViT-B-32__openai/openvino", + } + ] + def test_sets_provider_options_for_cuda(self) -> None: os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1" @@ -292,6 +305,23 @@ class TestOrtSession: assert session.sess_options.inter_op_num_threads == 1 assert session.sess_options.intra_op_num_threads == 2 + @pytest.mark.ov_device_ids(["CPU"]) + def test_sets_default_sess_options_if_openvino_cpu(self, ov_device_ids: list[str]) -> None: + model_path = "/cache/ViT-B-32__openai/model.onnx" + session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"]) + + assert session.sess_options.execution_mode == ort.ExecutionMode.ORT_SEQUENTIAL + assert session.sess_options.inter_op_num_threads == 1 + assert session.sess_options.intra_op_num_threads == 1 + + @pytest.mark.ov_device_ids(["GPU.0", "CPU"]) + def test_sets_default_sess_options_if_openvino_gpu(self, ov_device_ids: list[str]) -> None: + model_path = "/cache/ViT-B-32__openai/model.onnx" + session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"]) + + assert session.sess_options.inter_op_num_threads == 0 + assert session.sess_options.intra_op_num_threads == 0 + def test_sets_default_sess_options_does_not_set_threads_if_non_cpu_and_default_threads(self) -> None: session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"])