diff --git a/machine-learning/immich_ml/sessions/ort.py b/machine-learning/immich_ml/sessions/ort.py index d18aae751a..a637a13fb3 100644 --- a/machine-learning/immich_ml/sessions/ort.py +++ b/machine-learning/immich_ml/sessions/ort.py @@ -62,14 +62,6 @@ class OrtSession: def _providers_default(self) -> list[str]: available_providers = set(ort.get_available_providers()) log.debug(f"Available ORT providers: {available_providers}") - if (openvino := "OpenVINOExecutionProvider") in available_providers: - device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids() - log.debug(f"Available OpenVINO devices: {device_ids}") - - gpu_devices = [device_id for device_id in device_ids if device_id.startswith("GPU")] - if not gpu_devices: - log.warning("No GPU device found in OpenVINO. Falling back to CPU.") - available_providers.remove(openvino) return [provider for provider in SUPPORTED_PROVIDERS if provider in available_providers] @property @@ -91,8 +83,17 @@ class OrtSession: case "CUDAExecutionProvider" | "ROCMExecutionProvider": options = {"arena_extend_strategy": "kSameAsRequested", "device_id": settings.device_id} case "OpenVINOExecutionProvider": + device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids() + # Check for available devices, preferring GPU over CPU + gpu_devices = [d for d in device_ids if d.startswith("GPU")] + if gpu_devices: + device_type = f"GPU.{settings.device_id}" + log.debug(f"OpenVINO: Using GPU device {device_type}") + else: + device_type = "CPU" + log.debug("OpenVINO: No GPU found, using CPU") options = { - "device_type": f"GPU.{settings.device_id}", + "device_type": device_type, "precision": "FP32", "cache_dir": (self.model_path.parent / "openvino").as_posix(), } @@ -126,16 +127,28 @@ class OrtSession: sess_options.enable_cpu_mem_arena = settings.model_arena # avoid thread contention between models + # Set inter_op threads if settings.model_inter_op_threads > 0: sess_options.inter_op_num_threads = settings.model_inter_op_threads # these defaults work well for CPU, but bottleneck GPU elif settings.model_inter_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: sess_options.inter_op_num_threads = 1 + elif settings.model_inter_op_threads == 0 and ( + "OpenVINOExecutionProvider" in self.providers + and self._provider_options[self.providers.index("OpenVINOExecutionProvider")].get("device_type") == "CPU" + ): + sess_options.inter_op_num_threads = 1 + # Set intra_op threads if settings.model_intra_op_threads > 0: sess_options.intra_op_num_threads = settings.model_intra_op_threads elif settings.model_intra_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: sess_options.intra_op_num_threads = 2 + elif settings.model_intra_op_threads == 0 and ( + "OpenVINOExecutionProvider" in self.providers + and self._provider_options[self.providers.index("OpenVINOExecutionProvider")].get("device_type") == "CPU" + ): + sess_options.intra_op_num_threads = 1 if sess_options.inter_op_num_threads > 1: sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL diff --git a/machine-learning/test_main.py b/machine-learning/test_main.py index 582a05a950..9aa611622c 100644 --- a/machine-learning/test_main.py +++ b/machine-learning/test_main.py @@ -201,13 +201,6 @@ class TestOrtSession: assert session.providers == self.OV_EP - @pytest.mark.ov_device_ids(["CPU"]) - @pytest.mark.providers(OV_EP) - def test_avoids_openvino_if_gpu_not_available(self, providers: list[str], ov_device_ids: list[str]) -> None: - session = OrtSession("ViT-B-32__openai") - - assert session.providers == self.CPU_EP - @pytest.mark.providers(CUDA_EP_OUT_OF_ORDER) def test_sets_providers_in_correct_order(self, providers: list[str]) -> None: session = OrtSession("ViT-B-32__openai") @@ -248,7 +241,8 @@ class TestOrtSession: {"arena_extend_strategy": "kSameAsRequested"}, ] - def test_sets_provider_options_for_openvino(self) -> None: + @pytest.mark.ov_device_ids(["GPU.0", "GPU.1", "CPU"]) + def test_sets_provider_options_for_openvino(self, ov_device_ids: list[str]) -> None: model_path = "/cache/ViT-B-32__openai/textual/model.onnx" os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1" @@ -262,6 +256,19 @@ class TestOrtSession: } ] + @pytest.mark.ov_device_ids(["CPU"]) + def test_sets_provider_options_for_openvino_cpu(self, ov_device_ids: list[str]) -> None: + model_path = "/cache/ViT-B-32__openai/model.onnx" + session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"]) + + assert session.provider_options == [ + { + "device_type": "CPU", + "precision": "FP32", + "cache_dir": "/cache/ViT-B-32__openai/openvino", + } + ] + def test_sets_provider_options_for_cuda(self) -> None: os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1" @@ -279,7 +286,7 @@ class TestOrtSession: def test_sets_provider_options_kwarg(self) -> None: session = OrtSession( "ViT-B-32__openai", - providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"], + providers=["CUDAExecutionProvider", "CPUExecutionProvider"], provider_options=[], ) @@ -292,6 +299,23 @@ class TestOrtSession: assert session.sess_options.inter_op_num_threads == 1 assert session.sess_options.intra_op_num_threads == 2 + @pytest.mark.ov_device_ids(["CPU"]) + def test_sets_default_sess_options_if_openvino_cpu(self, ov_device_ids: list[str]) -> None: + model_path = "/cache/ViT-B-32__openai/model.onnx" + session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"]) + + assert session.sess_options.execution_mode == ort.ExecutionMode.ORT_SEQUENTIAL + assert session.sess_options.inter_op_num_threads == 1 + assert session.sess_options.intra_op_num_threads == 1 + + @pytest.mark.ov_device_ids(["GPU.0", "CPU"]) + def test_sets_default_sess_options_if_openvino_gpu(self, ov_device_ids: list[str]) -> None: + model_path = "/cache/ViT-B-32__openai/model.onnx" + session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"]) + + assert session.sess_options.inter_op_num_threads == 0 + assert session.sess_options.intra_op_num_threads == 0 + def test_sets_default_sess_options_does_not_set_threads_if_non_cpu_and_default_threads(self) -> None: session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"])