feat(server,ml): remove image tagging (#5903)

* remove image tagging

* updated lock

* fixed tests, improved logging

* be nice

* fixed tests
This commit is contained in:
Mert 2023-12-20 20:47:56 -05:00 committed by GitHub
parent 154292242f
commit 092a23fd7f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
65 changed files with 988 additions and 2930 deletions

View file

@ -59,3 +59,37 @@ def clip_preprocess_cfg() -> dict[str, Any]:
"resize_mode": "shortest",
"fill_color": 0,
}
@pytest.fixture(scope="session")
def clip_tokenizer_cfg() -> dict[str, Any]:
return {
"add_prefix_space": False,
"added_tokens_decoder": {
"49406": {
"content": "<|startoftext|>",
"lstrip": False,
"normalized": True,
"rstrip": False,
"single_word": False,
"special": True,
},
"49407": {
"content": "<|endoftext|>",
"lstrip": False,
"normalized": True,
"rstrip": False,
"single_word": False,
"special": True,
},
},
"bos_token": "<|startoftext|>",
"clean_up_tokenization_spaces": True,
"do_lower_case": True,
"eos_token": "<|endoftext|>",
"errors": "replace",
"model_max_length": 77,
"pad_token": "<|endoftext|>",
"tokenizer_class": "CLIPTokenizer",
"unk_token": "<|endoftext|>",
}

View file

@ -6,7 +6,6 @@ from .base import InferenceModel
from .clip import MCLIPEncoder, OpenCLIPEncoder
from .constants import is_insightface, is_mclip, is_openclip
from .facial_recognition import FaceRecognizer
from .image_classification import ImageClassifier
def from_model_type(model_type: ModelType, model_name: str, **model_kwargs: Any) -> InferenceModel:
@ -19,8 +18,6 @@ def from_model_type(model_type: ModelType, model_name: str, **model_kwargs: Any)
case ModelType.FACIAL_RECOGNITION:
if is_insightface(model_name):
return FaceRecognizer(model_name, **model_kwargs)
case ModelType.IMAGE_CLASSIFICATION:
return ImageClassifier(model_name, **model_kwargs)
case _:
raise ValueError(f"Unknown model type {model_type}")

View file

@ -35,7 +35,7 @@ class InferenceModel(ABC):
)
log.debug(
(
f"Setting '{self.model_name}' execution providers to {self.providers}"
f"Setting '{self.model_name}' execution providers to {self.providers} "
"in descending order of preference"
),
)
@ -55,7 +55,7 @@ class InferenceModel(ABC):
def download(self) -> None:
if not self.cached:
log.info(
(f"Downloading {self.model_type.replace('-', ' ')} model '{self.model_name}'." "This may take a while.")
f"Downloading {self.model_type.replace('-', ' ')} model '{self.model_name}'. This may take a while."
)
self._download()
@ -63,7 +63,7 @@ class InferenceModel(ABC):
if self.loaded:
return
self.download()
log.info(f"Loading {self.model_type.replace('-', ' ')} model '{self.model_name}'")
log.info(f"Loading {self.model_type.replace('-', ' ')} model '{self.model_name}' to memory")
self._load()
self.loaded = True
@ -119,11 +119,11 @@ class InferenceModel(ABC):
def clear_cache(self) -> None:
if not self.cache_dir.exists():
log.warn(
f"Attempted to clear cache for model '{self.model_name}' but cache directory does not exist.",
f"Attempted to clear cache for model '{self.model_name}', but cache directory does not exist",
)
return
if not rmtree.avoids_symlink_attacks:
raise RuntimeError("Attempted to clear cache, but rmtree is not safe on this platform.")
raise RuntimeError("Attempted to clear cache, but rmtree is not safe on this platform")
if self.cache_dir.is_dir():
log.info(f"Cleared cache directory for model '{self.model_name}'.")

View file

@ -8,11 +8,11 @@ from typing import Any, Literal
import numpy as np
import onnxruntime as ort
from PIL import Image
from transformers import AutoTokenizer
from tokenizers import Encoding, Tokenizer
from app.config import clean_name, log
from app.models.transforms import crop, get_pil_resampling, normalize, resize, to_numpy
from app.schemas import ModelType, ndarray_f32, ndarray_i32, ndarray_i64
from app.schemas import ModelType, ndarray_f32, ndarray_i32
from .base import InferenceModel
@ -40,6 +40,7 @@ class BaseCLIPEncoder(InferenceModel):
providers=self.providers,
provider_options=self.provider_options,
)
log.debug(f"Loaded clip text model '{self.model_name}'")
if self.mode == "vision" or self.mode is None:
log.debug(f"Loading clip vision model '{self.model_name}'")
@ -50,6 +51,7 @@ class BaseCLIPEncoder(InferenceModel):
providers=self.providers,
provider_options=self.provider_options,
)
log.debug(f"Loaded clip vision model '{self.model_name}'")
def _predict(self, image_or_text: Image.Image | str) -> ndarray_f32:
if isinstance(image_or_text, bytes):
@ -99,6 +101,14 @@ class BaseCLIPEncoder(InferenceModel):
def visual_path(self) -> Path:
return self.visual_dir / "model.onnx"
@property
def tokenizer_file_path(self) -> Path:
return self.textual_dir / "tokenizer.json"
@property
def tokenizer_cfg_path(self) -> Path:
return self.textual_dir / "tokenizer_config.json"
@property
def preprocess_cfg_path(self) -> Path:
return self.visual_dir / "preprocess_cfg.json"
@ -107,6 +117,34 @@ class BaseCLIPEncoder(InferenceModel):
def cached(self) -> bool:
return self.textual_path.is_file() and self.visual_path.is_file()
@cached_property
def model_cfg(self) -> dict[str, Any]:
log.debug(f"Loading model config for CLIP model '{self.model_name}'")
model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
log.debug(f"Loaded model config for CLIP model '{self.model_name}'")
return model_cfg
@cached_property
def tokenizer_file(self) -> dict[str, Any]:
log.debug(f"Loading tokenizer file for CLIP model '{self.model_name}'")
tokenizer_file: dict[str, Any] = json.load(self.tokenizer_file_path.open())
log.debug(f"Loaded tokenizer file for CLIP model '{self.model_name}'")
return tokenizer_file
@cached_property
def tokenizer_cfg(self) -> dict[str, Any]:
log.debug(f"Loading tokenizer config for CLIP model '{self.model_name}'")
tokenizer_cfg: dict[str, Any] = json.load(self.tokenizer_cfg_path.open())
log.debug(f"Loaded tokenizer config for CLIP model '{self.model_name}'")
return tokenizer_cfg
@cached_property
def preprocess_cfg(self) -> dict[str, Any]:
log.debug(f"Loading visual preprocessing config for CLIP model '{self.model_name}'")
preprocess_cfg: dict[str, Any] = json.load(self.preprocess_cfg_path.open())
log.debug(f"Loaded visual preprocessing config for CLIP model '{self.model_name}'")
return preprocess_cfg
class OpenCLIPEncoder(BaseCLIPEncoder):
def __init__(
@ -121,8 +159,8 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
def _load(self) -> None:
super()._load()
self.tokenizer = AutoTokenizer.from_pretrained(self.textual_dir)
self.sequence_length = self.model_cfg["text_cfg"]["context_length"]
context_length = self.model_cfg["text_cfg"]["context_length"]
pad_token = self.tokenizer_cfg["pad_token"]
self.size = (
self.preprocess_cfg["size"][0] if type(self.preprocess_cfg["size"]) == list else self.preprocess_cfg["size"]
@ -131,16 +169,16 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
self.mean = np.array(self.preprocess_cfg["mean"], dtype=np.float32)
self.std = np.array(self.preprocess_cfg["std"], dtype=np.float32)
log.debug(f"Loading tokenizer for CLIP model '{self.model_name}'")
self.tokenizer: Tokenizer = Tokenizer.from_file(self.tokenizer_file_path.as_posix())
pad_id = self.tokenizer.token_to_id(pad_token)
self.tokenizer.enable_padding(length=context_length, pad_token=pad_token, pad_id=pad_id)
self.tokenizer.enable_truncation(max_length=context_length)
log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'")
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
input_ids: ndarray_i64 = self.tokenizer(
text,
max_length=self.sequence_length,
return_tensors="np",
return_attention_mask=False,
padding="max_length",
truncation=True,
).input_ids
return {"text": input_ids.astype(np.int32)}
tokens: Encoding = self.tokenizer.encode(text)
return {"text": np.array([tokens.ids], dtype=np.int32)}
def transform(self, image: Image.Image) -> dict[str, ndarray_f32]:
image = resize(image, self.size)
@ -149,18 +187,11 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
image_np = normalize(image_np, self.mean, self.std)
return {"image": np.expand_dims(image_np.transpose(2, 0, 1), 0)}
@cached_property
def model_cfg(self) -> dict[str, Any]:
model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
return model_cfg
@cached_property
def preprocess_cfg(self) -> dict[str, Any]:
preprocess_cfg: dict[str, Any] = json.load(self.preprocess_cfg_path.open())
return preprocess_cfg
class MCLIPEncoder(OpenCLIPEncoder):
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
tokens: dict[str, ndarray_i64] = self.tokenizer(text, return_tensors="np")
return {k: v.astype(np.int32) for k, v in tokens.items()}
tokens: Encoding = self.tokenizer.encode(text)
return {
"input_ids": np.array([tokens.ids], dtype=np.int32),
"attention_mask": np.array([tokens.attention_mask], dtype=np.int32),
}

View file

@ -1,75 +0,0 @@
from io import BytesIO
from pathlib import Path
from typing import Any
from huggingface_hub import snapshot_download
from optimum.onnxruntime import ORTModelForImageClassification
from optimum.pipelines import pipeline
from PIL import Image
from transformers import AutoImageProcessor
from ..config import log
from ..schemas import ModelType
from .base import InferenceModel
class ImageClassifier(InferenceModel):
_model_type = ModelType.IMAGE_CLASSIFICATION
def __init__(
self,
model_name: str,
min_score: float = 0.9,
cache_dir: Path | str | None = None,
**model_kwargs: Any,
) -> None:
self.min_score = model_kwargs.pop("minScore", min_score)
super().__init__(model_name, cache_dir, **model_kwargs)
def _download(self) -> None:
snapshot_download(
cache_dir=self.cache_dir,
repo_id=self.model_name,
allow_patterns=["*.bin", "*.json", "*.txt"],
local_dir=self.cache_dir,
local_dir_use_symlinks=True,
)
def _load(self) -> None:
processor = AutoImageProcessor.from_pretrained(self.cache_dir, cache_dir=self.cache_dir)
model_path = self.cache_dir / "model.onnx"
model_kwargs = {
"cache_dir": self.cache_dir,
"provider": self.providers[0],
"provider_options": self.provider_options[0],
"session_options": self.sess_options,
}
if model_path.exists():
model = ORTModelForImageClassification.from_pretrained(self.cache_dir, **model_kwargs)
self.model = pipeline(self.model_type.value, model, feature_extractor=processor)
else:
log.info(
(
f"ONNX model not found in cache directory for '{self.model_name}'."
"Exporting optimized model for future use."
),
)
self.sess_options.optimized_model_filepath = model_path.as_posix()
self.model = pipeline(
self.model_type.value,
self.model_name,
model_kwargs=model_kwargs,
feature_extractor=processor,
)
def _predict(self, image: Image.Image | bytes) -> list[str]:
if isinstance(image, bytes):
image = Image.open(BytesIO(image))
predictions: list[dict[str, Any]] = self.model(image)
tags = [tag for pred in predictions for tag in pred["label"].split(", ") if pred["score"] >= self.min_score]
return tags
def configure(self, **model_kwargs: Any) -> None:
self.min_score = model_kwargs.pop("minScore", self.min_score)

View file

@ -25,7 +25,6 @@ class BoundingBox(TypedDict):
class ModelType(StrEnum):
IMAGE_CLASSIFICATION = "image-classification"
CLIP = "clip"
FACIAL_RECOGNITION = "facial-recognition"

View file

@ -17,42 +17,9 @@ from .models.base import PicklableSessionOptions
from .models.cache import ModelCache
from .models.clip import OpenCLIPEncoder
from .models.facial_recognition import FaceRecognizer
from .models.image_classification import ImageClassifier
from .schemas import ModelType
class TestImageClassifier:
classifier_preds = [
{"label": "that's an image alright", "score": 0.8},
{"label": "well it ends with .jpg", "score": 0.1},
{"label": "idk, im just seeing bytes", "score": 0.05},
{"label": "not sure", "score": 0.04},
{"label": "probably a virus", "score": 0.01},
]
def test_min_score(self, pil_image: Image.Image, mocker: MockerFixture) -> None:
mocker.patch.object(ImageClassifier, "load")
classifier = ImageClassifier("test_model_name", min_score=0.0)
assert classifier.min_score == 0.0
classifier.model = mock.Mock()
classifier.model.return_value = self.classifier_preds
all_labels = classifier.predict(pil_image)
classifier.min_score = 0.5
filtered_labels = classifier.predict(pil_image)
assert all_labels == [
"that's an image alright",
"well it ends with .jpg",
"idk",
"im just seeing bytes",
"not sure",
"probably a virus",
]
assert filtered_labels == ["that's an image alright"]
class TestCLIP:
embedding = np.random.rand(512).astype(np.float32)
cache_dir = Path("test_cache")
@ -63,11 +30,13 @@ class TestCLIP:
mocker: MockerFixture,
clip_model_cfg: dict[str, Any],
clip_preprocess_cfg: Callable[[Path], dict[str, Any]],
clip_tokenizer_cfg: Callable[[Path], dict[str, Any]],
) -> None:
mocker.patch.object(OpenCLIPEncoder, "download")
mocker.patch.object(OpenCLIPEncoder, "model_cfg", clip_model_cfg)
mocker.patch.object(OpenCLIPEncoder, "preprocess_cfg", clip_preprocess_cfg)
mocker.patch("app.models.clip.AutoTokenizer.from_pretrained", autospec=True)
mocker.patch.object(OpenCLIPEncoder, "tokenizer_cfg", clip_tokenizer_cfg)
mocker.patch("app.models.clip.Tokenizer.from_file", autospec=True)
mocked = mocker.patch("app.models.clip.ort.InferenceSession", autospec=True)
mocked.return_value.run.return_value = [[self.embedding]]
@ -85,11 +54,13 @@ class TestCLIP:
mocker: MockerFixture,
clip_model_cfg: dict[str, Any],
clip_preprocess_cfg: Callable[[Path], dict[str, Any]],
clip_tokenizer_cfg: Callable[[Path], dict[str, Any]],
) -> None:
mocker.patch.object(OpenCLIPEncoder, "download")
mocker.patch.object(OpenCLIPEncoder, "model_cfg", clip_model_cfg)
mocker.patch.object(OpenCLIPEncoder, "preprocess_cfg", clip_preprocess_cfg)
mocker.patch("app.models.clip.AutoTokenizer.from_pretrained", autospec=True)
mocker.patch.object(OpenCLIPEncoder, "tokenizer_cfg", clip_tokenizer_cfg)
mocker.patch("app.models.clip.Tokenizer.from_file", autospec=True)
mocked = mocker.patch("app.models.clip.ort.InferenceSession", autospec=True)
mocked.return_value.run.return_value = [[self.embedding]]
@ -145,17 +116,15 @@ class TestFaceRecognition:
class TestCache:
async def test_caches(self, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache()
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
assert len(model_cache.cache._cache) == 1
mock_get_model.assert_called_once()
async def test_kwargs_used(self, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache()
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION, cache_dir="test_cache")
mock_get_model.assert_called_once_with(
ModelType.IMAGE_CLASSIFICATION, "test_model_name", cache_dir="test_cache"
)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION, cache_dir="test_cache")
mock_get_model.assert_called_once_with(ModelType.FACIAL_RECOGNITION, "test_model_name", cache_dir="test_cache")
async def test_different_clip(self, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache()
@ -172,14 +141,14 @@ class TestCache:
@mock.patch("app.models.cache.OptimisticLock", autospec=True)
async def test_model_ttl(self, mock_lock_cls: mock.Mock, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache(ttl=100)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
mock_lock_cls.return_value.__aenter__.return_value.cas.assert_called_with(mock.ANY, ttl=100)
@mock.patch("app.models.cache.SimpleMemoryCache.expire")
async def test_revalidate(self, mock_cache_expire: mock.Mock, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache(ttl=100, revalidate=True)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
mock_cache_expire.assert_called_once_with(mock.ANY, 100)
@ -188,23 +157,6 @@ class TestCache:
reason="More time-consuming since it deploys the app and loads models.",
)
class TestEndpoints:
def test_tagging_endpoint(
self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient
) -> None:
byte_image = BytesIO()
pil_image.save(byte_image, format="jpeg")
response = deployed_app.post(
"http://localhost:3003/predict",
data={
"modelName": "microsoft/resnet-50",
"modelType": "image-classification",
"options": json.dumps({"minScore": 0.0}),
},
files={"image": byte_image.getvalue()},
)
assert response.status_code == 200
assert response.json() == responses["image-classification"]
def test_clip_image_endpoint(
self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient
) -> None: