This commit is contained in:
Aleksander Pejcic 2025-10-17 11:16:22 -05:00 committed by GitHub
commit 4d2f5a753a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 55 additions and 18 deletions

View file

@ -62,14 +62,6 @@ class OrtSession:
def _providers_default(self) -> list[str]: def _providers_default(self) -> list[str]:
available_providers = set(ort.get_available_providers()) available_providers = set(ort.get_available_providers())
log.debug(f"Available ORT providers: {available_providers}") log.debug(f"Available ORT providers: {available_providers}")
if (openvino := "OpenVINOExecutionProvider") in available_providers:
device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids()
log.debug(f"Available OpenVINO devices: {device_ids}")
gpu_devices = [device_id for device_id in device_ids if device_id.startswith("GPU")]
if not gpu_devices:
log.warning("No GPU device found in OpenVINO. Falling back to CPU.")
available_providers.remove(openvino)
return [provider for provider in SUPPORTED_PROVIDERS if provider in available_providers] return [provider for provider in SUPPORTED_PROVIDERS if provider in available_providers]
@property @property
@ -91,8 +83,17 @@ class OrtSession:
case "CUDAExecutionProvider" | "ROCMExecutionProvider": case "CUDAExecutionProvider" | "ROCMExecutionProvider":
options = {"arena_extend_strategy": "kSameAsRequested", "device_id": settings.device_id} options = {"arena_extend_strategy": "kSameAsRequested", "device_id": settings.device_id}
case "OpenVINOExecutionProvider": case "OpenVINOExecutionProvider":
device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids()
# Check for available devices, preferring GPU over CPU
gpu_devices = [d for d in device_ids if d.startswith("GPU")]
if gpu_devices:
device_type = f"GPU.{settings.device_id}"
log.debug(f"OpenVINO: Using GPU device {device_type}")
else:
device_type = "CPU"
log.debug("OpenVINO: No GPU found, using CPU")
options = { options = {
"device_type": f"GPU.{settings.device_id}", "device_type": device_type,
"precision": "FP32", "precision": "FP32",
"cache_dir": (self.model_path.parent / "openvino").as_posix(), "cache_dir": (self.model_path.parent / "openvino").as_posix(),
} }
@ -126,16 +127,28 @@ class OrtSession:
sess_options.enable_cpu_mem_arena = settings.model_arena sess_options.enable_cpu_mem_arena = settings.model_arena
# avoid thread contention between models # avoid thread contention between models
# Set inter_op threads
if settings.model_inter_op_threads > 0: if settings.model_inter_op_threads > 0:
sess_options.inter_op_num_threads = settings.model_inter_op_threads sess_options.inter_op_num_threads = settings.model_inter_op_threads
# these defaults work well for CPU, but bottleneck GPU # these defaults work well for CPU, but bottleneck GPU
elif settings.model_inter_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: elif settings.model_inter_op_threads == 0 and self.providers == ["CPUExecutionProvider"]:
sess_options.inter_op_num_threads = 1 sess_options.inter_op_num_threads = 1
elif settings.model_inter_op_threads == 0 and (
"OpenVINOExecutionProvider" in self.providers
and self._provider_options[self.providers.index("OpenVINOExecutionProvider")].get("device_type") == "CPU"
):
sess_options.inter_op_num_threads = 1
# Set intra_op threads
if settings.model_intra_op_threads > 0: if settings.model_intra_op_threads > 0:
sess_options.intra_op_num_threads = settings.model_intra_op_threads sess_options.intra_op_num_threads = settings.model_intra_op_threads
elif settings.model_intra_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: elif settings.model_intra_op_threads == 0 and self.providers == ["CPUExecutionProvider"]:
sess_options.intra_op_num_threads = 2 sess_options.intra_op_num_threads = 2
elif settings.model_intra_op_threads == 0 and (
"OpenVINOExecutionProvider" in self.providers
and self._provider_options[self.providers.index("OpenVINOExecutionProvider")].get("device_type") == "CPU"
):
sess_options.intra_op_num_threads = 1
if sess_options.inter_op_num_threads > 1: if sess_options.inter_op_num_threads > 1:
sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL

View file

@ -201,13 +201,6 @@ class TestOrtSession:
assert session.providers == self.OV_EP assert session.providers == self.OV_EP
@pytest.mark.ov_device_ids(["CPU"])
@pytest.mark.providers(OV_EP)
def test_avoids_openvino_if_gpu_not_available(self, providers: list[str], ov_device_ids: list[str]) -> None:
session = OrtSession("ViT-B-32__openai")
assert session.providers == self.CPU_EP
@pytest.mark.providers(CUDA_EP_OUT_OF_ORDER) @pytest.mark.providers(CUDA_EP_OUT_OF_ORDER)
def test_sets_providers_in_correct_order(self, providers: list[str]) -> None: def test_sets_providers_in_correct_order(self, providers: list[str]) -> None:
session = OrtSession("ViT-B-32__openai") session = OrtSession("ViT-B-32__openai")
@ -248,7 +241,8 @@ class TestOrtSession:
{"arena_extend_strategy": "kSameAsRequested"}, {"arena_extend_strategy": "kSameAsRequested"},
] ]
def test_sets_provider_options_for_openvino(self) -> None: @pytest.mark.ov_device_ids(["GPU.0", "GPU.1", "CPU"])
def test_sets_provider_options_for_openvino(self, ov_device_ids: list[str]) -> None:
model_path = "/cache/ViT-B-32__openai/textual/model.onnx" model_path = "/cache/ViT-B-32__openai/textual/model.onnx"
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1" os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
@ -262,6 +256,19 @@ class TestOrtSession:
} }
] ]
@pytest.mark.ov_device_ids(["CPU"])
def test_sets_provider_options_for_openvino_cpu(self, ov_device_ids: list[str]) -> None:
model_path = "/cache/ViT-B-32__openai/model.onnx"
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"])
assert session.provider_options == [
{
"device_type": "CPU",
"precision": "FP32",
"cache_dir": "/cache/ViT-B-32__openai/openvino",
}
]
def test_sets_provider_options_for_cuda(self) -> None: def test_sets_provider_options_for_cuda(self) -> None:
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1" os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
@ -279,7 +286,7 @@ class TestOrtSession:
def test_sets_provider_options_kwarg(self) -> None: def test_sets_provider_options_kwarg(self) -> None:
session = OrtSession( session = OrtSession(
"ViT-B-32__openai", "ViT-B-32__openai",
providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"], providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
provider_options=[], provider_options=[],
) )
@ -292,6 +299,23 @@ class TestOrtSession:
assert session.sess_options.inter_op_num_threads == 1 assert session.sess_options.inter_op_num_threads == 1
assert session.sess_options.intra_op_num_threads == 2 assert session.sess_options.intra_op_num_threads == 2
@pytest.mark.ov_device_ids(["CPU"])
def test_sets_default_sess_options_if_openvino_cpu(self, ov_device_ids: list[str]) -> None:
model_path = "/cache/ViT-B-32__openai/model.onnx"
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"])
assert session.sess_options.execution_mode == ort.ExecutionMode.ORT_SEQUENTIAL
assert session.sess_options.inter_op_num_threads == 1
assert session.sess_options.intra_op_num_threads == 1
@pytest.mark.ov_device_ids(["GPU.0", "CPU"])
def test_sets_default_sess_options_if_openvino_gpu(self, ov_device_ids: list[str]) -> None:
model_path = "/cache/ViT-B-32__openai/model.onnx"
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"])
assert session.sess_options.inter_op_num_threads == 0
assert session.sess_options.intra_op_num_threads == 0
def test_sets_default_sess_options_does_not_set_threads_if_non_cpu_and_default_threads(self) -> None: def test_sets_default_sess_options_does_not_set_threads_if_non_cpu_and_default_threads(self) -> None:
session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"])