diff --git a/machine-learning/immich_ml/config.py b/machine-learning/immich_ml/config.py index 939afbc98b..c4d13ee7d2 100644 --- a/machine-learning/immich_ml/config.py +++ b/machine-learning/immich_ml/config.py @@ -41,6 +41,7 @@ class PreloadModelData(BaseModel): class MaxBatchSize(BaseModel): facial_recognition: int | None = None + text_recognition: int | None = None class Settings(BaseSettings): diff --git a/machine-learning/immich_ml/models/__init__.py b/machine-learning/immich_ml/models/__init__.py index 527bbdb907..6c3a74c963 100644 --- a/machine-learning/immich_ml/models/__init__.py +++ b/machine-learning/immich_ml/models/__init__.py @@ -3,12 +3,14 @@ from typing import Any from immich_ml.models.base import InferenceModel from immich_ml.models.clip.textual import MClipTextualEncoder, OpenClipTextualEncoder from immich_ml.models.clip.visual import OpenClipVisualEncoder +from immich_ml.models.ocr.detection import TextDetector +from immich_ml.models.ocr.recognition import TextRecognizer from immich_ml.schemas import ModelSource, ModelTask, ModelType from .constants import get_model_source from .facial_recognition.detection import FaceDetector from .facial_recognition.recognition import FaceRecognizer -from .ocr.paddle import PaddleOCRecognizer + def get_model_class(model_name: str, model_type: ModelType, model_task: ModelTask) -> type[InferenceModel]: source = get_model_source(model_name) @@ -28,8 +30,11 @@ def get_model_class(model_name: str, model_type: ModelType, model_task: ModelTas case ModelSource.INSIGHTFACE, ModelType.RECOGNITION, ModelTask.FACIAL_RECOGNITION: return FaceRecognizer - case ModelSource.PADDLE, ModelType.OCR, ModelTask.OCR: - return PaddleOCRecognizer + case ModelSource.PADDLE, ModelType.DETECTION, ModelTask.OCR: + return TextDetector + + case ModelSource.PADDLE, ModelType.RECOGNITION, ModelTask.OCR: + return TextRecognizer case _: raise ValueError(f"Unknown model combination: {source}, {model_type}, {model_task}") diff --git a/machine-learning/immich_ml/models/base.py b/machine-learning/immich_ml/models/base.py index 3ee701fae0..5c40b21f32 100644 --- a/machine-learning/immich_ml/models/base.py +++ b/machine-learning/immich_ml/models/base.py @@ -38,9 +38,8 @@ class InferenceModel(ABC): def download(self) -> None: if not self.cached: - log.info( - f"Downloading {self.model_type.replace('-', ' ')} model '{self.model_name}'. This may take a while." - ) + model_type = self.model_type.replace("-", " ") + log.info(f"Downloading {model_type} model '{self.model_name}' to {self.model_path}. This may take a while.") self._download() def load(self) -> None: diff --git a/machine-learning/immich_ml/models/ocr/detection.py b/machine-learning/immich_ml/models/ocr/detection.py new file mode 100644 index 0000000000..2b80cea9fd --- /dev/null +++ b/machine-learning/immich_ml/models/ocr/detection.py @@ -0,0 +1,79 @@ +from typing import Any + +import numpy as np +from PIL import Image +from rapidocr.ch_ppocr_det import TextDetector as RapidTextDetector +from rapidocr.inference_engine.base import FileInfo, InferSession +from rapidocr.utils import DownloadFile, DownloadFileInput +from rapidocr.utils.typings import EngineType, LangDet, OCRVersion, TaskType +from rapidocr.utils.typings import ModelType as RapidModelType + +from immich_ml.config import log +from immich_ml.models.base import InferenceModel +from immich_ml.models.transforms import decode_cv2 +from immich_ml.schemas import ModelSession, ModelTask, ModelType + +from .schemas import OcrOptions, TextDetectionOutput + + +class TextDetector(InferenceModel): + depends = [] + identity = (ModelType.DETECTION, ModelTask.OCR) + + def __init__(self, model_name: str, **model_kwargs: Any) -> None: + super().__init__(model_name, **model_kwargs) + self.max_resolution = 1440 + self.min_score = 0.5 + self.score_mode = "fast" + self._empty: TextDetectionOutput = { + "resized": np.empty(0, dtype=np.float32), + "boxes": np.empty(0, dtype=np.float32), + "scores": (), + } + + def _download(self) -> None: + model_info = InferSession.get_model_url( + FileInfo( + engine_type=EngineType.ONNXRUNTIME, + ocr_version=OCRVersion.PPOCRV5, + task_type=TaskType.DET, + lang_type=LangDet.CH, + model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER, + ) + ) + download_params = DownloadFileInput( + file_url=model_info["model_dir"], + sha256=model_info["SHA256"], + save_path=self.model_path, + logger=log, + ) + DownloadFile.run(download_params) + + def _load(self) -> ModelSession: + session = self._make_session(self.model_path) + self.model = RapidTextDetector( + OcrOptions( + session=session.session, + limit_side_len=self.max_resolution, + limit_type="min", + box_thresh=self.min_score, + score_mode=self.score_mode, + ) + ) + return session + + def configure(self, **kwargs: Any) -> None: + self.max_resolution = kwargs.get("maxResolution", self.max_resolution) + self.min_score = kwargs.get("minScore", self.min_score) + self.score_mode = kwargs.get("scoreMode", self.score_mode) + + def _predict(self, inputs: bytes | Image.Image, **kwargs: Any) -> TextDetectionOutput: + results = self.model(decode_cv2(inputs)) + if results.boxes is None or results.scores is None or results.img is None: + return self._empty + log.info(f"{results.boxes=}, {results.scores=}") + return { + "resized": results.img, + "boxes": np.array(results.boxes, dtype=np.float32), + "scores": np.array(results.scores, dtype=np.float32), + } diff --git a/machine-learning/immich_ml/models/ocr/paddle.py b/machine-learning/immich_ml/models/ocr/paddle.py deleted file mode 100644 index 801f5b5281..0000000000 --- a/machine-learning/immich_ml/models/ocr/paddle.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Any, List - -import numpy as np -from numpy.typing import NDArray -from paddleocr import PaddleOCR -from PIL import Image -from immich_ml.models.base import InferenceModel -from immich_ml.models.transforms import decode_cv2 -from immich_ml.schemas import OCROutput, ModelTask, ModelType - -class PaddleOCRecognizer(InferenceModel): - depends = [] - identity = (ModelType.OCR, ModelTask.OCR) - - def __init__(self, model_name: str, **model_kwargs: Any) -> None: - self.orientation_classify_enabled = model_kwargs.get("orientationClassifyEnabled", False) - self.unwarping_enabled = model_kwargs.get("unwarpingEnabled", False) - super().__init__(model_name, **model_kwargs) - self._load() - self.loaded = True - - def _load(self) -> PaddleOCR: - self.model = PaddleOCR( - text_detection_model_name=f"{self.model_name}_det", - text_recognition_model_name=f"{self.model_name}_rec", - use_doc_orientation_classify=self.orientation_classify_enabled, - use_doc_unwarping=self.unwarping_enabled, - ) - - def configure(self, **kwargs: Any) -> None: - self.min_detection_score = kwargs.get("minDetectionScore", 0.3) - self.min_detection_box_score = kwargs.get("minDetectionBoxScore", 0.6) - self.min_recognition_score = kwargs.get("minRecognitionScore", 0.0) - - def _predict(self, inputs: NDArray[np.uint8] | bytes | Image.Image, **kwargs: Any) -> List[OCROutput]: - inputs = decode_cv2(inputs) - results = self.model.predict( - inputs, - text_det_thresh=self.min_detection_score, - text_det_box_thresh=self.min_detection_box_score, - text_rec_score_thresh=self.min_recognition_score - ) - return [ - OCROutput( - text=text, confidence=score, - x1=box[0][0], y1=box[0][1], x2=box[1][0], y2=box[1][1], - x3=box[2][0], y3=box[2][1], x4=box[3][0], y4=box[3][1] - ) - for result in results - for text, score, box in zip(result['rec_texts'], result['rec_scores'], result['rec_polys']) - ] diff --git a/machine-learning/immich_ml/models/ocr/recognition.py b/machine-learning/immich_ml/models/ocr/recognition.py new file mode 100644 index 0000000000..74b3862dca --- /dev/null +++ b/machine-learning/immich_ml/models/ocr/recognition.py @@ -0,0 +1,115 @@ +from typing import Any + +import cv2 +import numpy as np +from PIL.Image import Image +from rapidocr.ch_ppocr_rec import TextRecInput +from rapidocr.ch_ppocr_rec import TextRecognizer as RapidTextRecognizer +from rapidocr.inference_engine.base import FileInfo, InferSession +from rapidocr.utils import DownloadFile, DownloadFileInput +from rapidocr.utils.typings import EngineType, LangDet, OCRVersion, TaskType +from rapidocr.utils.typings import ModelType as RapidModelType + +from immich_ml.config import log, settings +from immich_ml.models.base import InferenceModel +from immich_ml.schemas import ModelSession, ModelTask, ModelType + +from .schemas import OcrOptions, TextDetectionOutput, TextRecognitionOutput + + +class TextRecognizer(InferenceModel): + depends = [(ModelType.DETECTION, ModelTask.OCR)] + identity = (ModelType.RECOGNITION, ModelTask.OCR) + + def __init__(self, model_name: str, **model_kwargs: Any) -> None: + self.min_score = model_kwargs.get("minScore", 0.5) + self._empty: TextRecognitionOutput = { + "box": np.empty(0, dtype=np.float32), + "boxScore": [], + "text": [], + "textScore": [], + } + super().__init__(model_name, **model_kwargs) + + def _download(self) -> None: + model_info = InferSession.get_model_url( + FileInfo( + engine_type=EngineType.ONNXRUNTIME, + ocr_version=OCRVersion.PPOCRV5, + task_type=TaskType.REC, + lang_type=LangDet.CH, + model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER, + ) + ) + download_params = DownloadFileInput( + file_url=model_info["model_dir"], + sha256=model_info["SHA256"], + save_path=self.model_path, + logger=log, + ) + DownloadFile.run(download_params) + + def _load(self) -> ModelSession: + session = self._make_session(self.model_path) + self.model = RapidTextRecognizer( + OcrOptions( + session=session.session, + rec_batch_num=settings.max_batch_size.text_recognition if settings.max_batch_size is not None else 6, + rec_img_shape=(3, 48, 320), + ) + ) + return session + + def configure(self, **kwargs: Any) -> None: + self.min_score = kwargs.get("minScore", self.min_score) + + def _predict(self, _: Image, texts: TextDetectionOutput, **kwargs: Any) -> TextRecognitionOutput: + boxes, resized_img, box_scores = texts["boxes"], texts["resized"], texts["scores"] + if boxes.shape[0] == 0: + return self._empty + rec = self.model(TextRecInput(img=self.get_crop_img_list(resized_img, boxes))) + if rec.txts is None: + return self._empty + + height, width = resized_img.shape[0:2] + log.info(f"Image shape: width={width}, height={height}") + boxes[:, :, 0] /= width + boxes[:, :, 1] /= height + + text_scores = np.array(rec.scores) + valid_text_score_idx = text_scores > 0.5 + valid_score_idx_list = valid_text_score_idx.tolist() + return { + "box": boxes.reshape(-1, 8)[valid_text_score_idx], + "text": [rec.txts[i] for i in range(len(rec.txts)) if valid_score_idx_list[i]], + "boxScore": box_scores[valid_text_score_idx], + "textScore": text_scores[valid_text_score_idx], + } + + def get_crop_img_list(self, img: np.ndarray, boxes: np.ndarray) -> list[np.ndarray]: + img_crop_width = np.maximum( + np.linalg.norm(boxes[:, 1] - boxes[:, 0], axis=1), np.linalg.norm(boxes[:, 2] - boxes[:, 3], axis=1) + ).astype(np.int32) + img_crop_height = np.maximum( + np.linalg.norm(boxes[:, 0] - boxes[:, 3], axis=1), np.linalg.norm(boxes[:, 1] - boxes[:, 2], axis=1) + ).astype(np.int32) + pts_std = np.zeros((img_crop_width.shape[0], 4, 2), dtype=np.float32) + pts_std[:, 1:3, 0] = img_crop_width[:, None] + pts_std[:, 2:4, 1] = img_crop_height[:, None] + + img_crop_sizes = np.stack([img_crop_width, img_crop_height], axis=1).tolist() + imgs = [] + for box, pts_std, dst_size in zip(list(boxes), list(pts_std), img_crop_sizes): + M = cv2.getPerspectiveTransform(box, pts_std) + dst_img = cv2.warpPerspective( + img, + M, + dst_size, + borderMode=cv2.BORDER_REPLICATE, + flags=cv2.INTER_CUBIC, + ) + dst_height, dst_width = dst_img.shape[0:2] + if dst_height * 1.0 / dst_width >= 1.5: + dst_img = np.rot90(dst_img) + imgs.append(dst_img) + return imgs diff --git a/machine-learning/immich_ml/models/ocr/schemas.py b/machine-learning/immich_ml/models/ocr/schemas.py new file mode 100644 index 0000000000..bacc60bb2a --- /dev/null +++ b/machine-learning/immich_ml/models/ocr/schemas.py @@ -0,0 +1,26 @@ +from typing import Iterable + +import numpy as np +import numpy.typing as npt +from rapidocr.utils.typings import EngineType +from typing_extensions import TypedDict + + +class TextDetectionOutput(TypedDict): + resized: npt.NDArray[np.float32] + boxes: npt.NDArray[np.float32] + scores: Iterable[float] + + +class TextRecognitionOutput(TypedDict): + box: npt.NDArray[np.float32] + boxScore: Iterable[float] + text: Iterable[str] + textScore: Iterable[float] + + +# RapidOCR expects engine_type to be an attribute +class OcrOptions(dict): + def __init__(self, **options): + super().__init__(**options) + self.engine_type = EngineType.ONNXRUNTIME diff --git a/machine-learning/immich_ml/schemas.py b/machine-learning/immich_ml/schemas.py index e95b51b11d..5cf5a6bc35 100644 --- a/machine-learning/immich_ml/schemas.py +++ b/machine-learning/immich_ml/schemas.py @@ -25,6 +25,7 @@ class ModelTask(StrEnum): SEARCH = "clip" OCR = "ocr" + class ModelType(StrEnum): DETECTION = "detection" RECOGNITION = "recognition" @@ -32,6 +33,7 @@ class ModelType(StrEnum): VISUAL = "visual" OCR = "ocr" + class ModelFormat(StrEnum): ARMNN = "armnn" ONNX = "onnx" @@ -44,6 +46,7 @@ class ModelSource(StrEnum): OPENCLIP = "openclip" PADDLE = "paddle" + ModelIdentity = tuple[ModelType, ModelTask] @@ -87,19 +90,6 @@ class DetectedFace(TypedDict): FacialRecognitionOutput = list[DetectedFace] -class OCROutput(TypedDict): - text: str - confidence: float - x1: int - y1: int - x2: int - y2: int - x3: int - y3: int - x4: int - y4: int - - class PipelineEntry(TypedDict): modelName: str options: dict[str, Any] diff --git a/machine-learning/pyproject.toml b/machine-learning/pyproject.toml index aba08578a9..4e70163711 100644 --- a/machine-learning/pyproject.toml +++ b/machine-learning/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "uvicorn[standard]>=0.22.0,<1.0", "paddleocr>=3.0.0", "setuptools>=78.1.0", + "rapidocr>=3.1.0", ] [dependency-groups] @@ -50,15 +51,11 @@ lint = [ dev = ["locust>=2.15.1", { include-group = "test" }, { include-group = "lint" }] [project.optional-dependencies] -cpu = ["onnxruntime>=1.15.0,<2", "paddlepaddle==3.0.0rc1"] -cuda = ["onnxruntime-gpu>=1.17.0,<2", "paddlepaddle-gpu==3.0.0rc1"] -openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0", "paddlepaddle==3.0.0rc1"] -armnn = ["onnxruntime>=1.15.0,<2", "paddlepaddle==3.0.0rc1"] -rknn = [ - "onnxruntime>=1.15.0,<2", - "rknn-toolkit-lite2>=2.3.0,<3", - "paddlepaddle==3.0.0rc1", -] +cpu = ["onnxruntime>=1.15.0,<2"] +cuda = ["onnxruntime-gpu>=1.17.0,<2"] +openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"] +armnn = ["onnxruntime>=1.15.0,<2"] +rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"] rocm = [] [tool.uv] diff --git a/machine-learning/uv.lock b/machine-learning/uv.lock index 87c25f3f20..c89a53b687 100644 --- a/machine-learning/uv.lock +++ b/machine-learning/uv.lock @@ -71,6 +71,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, ] +[[package]] +name = "antlr4-python3-runtime" +version = "4.9.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034 } + [[package]] name = "anyio" version = "4.2.0" @@ -86,15 +92,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/cd/d6d9bb1dadf73e7af02d18225cbd2c93f8552e13130484f1c8dcfece292b/anyio-4.2.0-py3-none-any.whl", hash = "sha256:745843b39e829e108e518c489b31dc757de7d2131d53fac32bd8df268227bfee", size = 85481 }, ] -[[package]] -name = "astor" -version = "0.8.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5a/21/75b771132fee241dfe601d39ade629548a9626d1d39f333fde31bc46febe/astor-0.8.1.tar.gz", hash = "sha256:6a6effda93f4e1ce9f618779b2dd1d9d84f1e32812c23a29b3fff6fd7f63fa5e", size = 35090 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c3/88/97eef84f48fa04fbd6750e62dcceafba6c63c81b7ac1420856c8dcc0a3f9/astor-0.8.1-py2.py3-none-any.whl", hash = "sha256:070a54e890cefb5b3739d19f30f5a5ec840ffc9c50ffa7d23cc9fc1a38ebbfc5", size = 27488 }, -] - [[package]] name = "backports-asyncio-runner" version = "1.2.0" @@ -671,15 +668,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/7f/f584f5d15323feb897d42ef0e9d910649e2150d7a30cf7e7a8cc1d236e6f/Cython-3.0.8-py2.py3-none-any.whl", hash = "sha256:171b27051253d3f9108e9759e504ba59ff06e7f7ba944457f94deaf9c21bf0b6", size = 1168213 }, ] -[[package]] -name = "decorator" -version = "5.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190 }, -] - [[package]] name = "easydict" version = "1.11" @@ -1168,6 +1156,7 @@ dependencies = [ { name = "pydantic" }, { name = "pydantic-settings" }, { name = "python-multipart" }, + { name = "rapidocr" }, { name = "rich" }, { name = "setuptools" }, { name = "tokenizers" }, @@ -1177,23 +1166,18 @@ dependencies = [ [package.optional-dependencies] armnn = [ { name = "onnxruntime" }, - { name = "paddlepaddle" }, ] cpu = [ { name = "onnxruntime" }, - { name = "paddlepaddle" }, ] cuda = [ { name = "onnxruntime-gpu" }, - { name = "paddlepaddle-gpu" }, ] openvino = [ { name = "onnxruntime-openvino" }, - { name = "paddlepaddle" }, ] rknn = [ { name = "onnxruntime" }, - { name = "paddlepaddle" }, { name = "rknn-toolkit-lite2" }, ] @@ -1256,15 +1240,11 @@ requires-dist = [ { name = "opencv-python-headless", specifier = ">=4.7.0.72,<5.0" }, { name = "orjson", specifier = ">=3.9.5" }, { name = "paddleocr", specifier = ">=3.0.0" }, - { name = "paddlepaddle", marker = "extra == 'armnn'", specifier = "==3.0.0rc1", index = "https://www.paddlepaddle.org.cn/packages/stable/cpu/" }, - { name = "paddlepaddle", marker = "extra == 'cpu'", specifier = "==3.0.0rc1", index = "https://www.paddlepaddle.org.cn/packages/stable/cpu/" }, - { name = "paddlepaddle", marker = "extra == 'openvino'", specifier = "==3.0.0rc1", index = "https://www.paddlepaddle.org.cn/packages/stable/cpu/" }, - { name = "paddlepaddle", marker = "extra == 'rknn'", specifier = "==3.0.0rc1", index = "https://www.paddlepaddle.org.cn/packages/stable/cpu/" }, - { name = "paddlepaddle-gpu", marker = "extra == 'cuda'", specifier = "==3.0.0rc1", index = "https://www.paddlepaddle.org.cn/packages/stable/cu118/" }, { name = "pillow", specifier = ">=9.5.0,<11.0" }, { name = "pydantic", specifier = ">=2.0.0,<3" }, { name = "pydantic-settings", specifier = ">=2.5.2,<3" }, { name = "python-multipart", specifier = ">=0.0.6,<1.0" }, + { name = "rapidocr", specifier = ">=3.1.0" }, { name = "rich", specifier = ">=13.4.2" }, { name = "rknn-toolkit-lite2", marker = "extra == 'rknn'", specifier = ">=2.3.0,<3" }, { name = "setuptools", specifier = ">=78.1.0" }, @@ -1836,107 +1816,16 @@ wheels = [ ] [[package]] -name = "nvidia-cublas-cu11" -version = "11.11.3.6" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/46/be/c222e33e60d28ecd496a46fc4d78ccae0ee28e1fd7dc705b6288b4cad27e/nvidia_cublas_cu11-11.11.3.6-py3-none-manylinux1_x86_64.whl", hash = "sha256:39fb40e8f486dd8a2ddb8fdeefe1d5b28f5b99df01c87ab3676f057a74a5a6f3", size = 417870452 }, - { url = "https://files.pythonhosted.org/packages/ea/2e/9d99c60771d275ecf6c914a612e9a577f740a615bc826bec132368e1d3ae/nvidia_cublas_cu11-11.11.3.6-py3-none-manylinux2014_x86_64.whl", hash = "sha256:60252822adea5d0b10cd990a7dc7bedf7435f30ae40083c7a624a85a43225abc", size = 417870460 }, -] - -[[package]] -name = "nvidia-cuda-cupti-cu11" -version = "11.8.87" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/27/c9/b4b15f709a694ea9f84871c6c4fbeeb54bab225962d852665a2c6f77f90d/nvidia_cuda_cupti_cu11-11.8.87-py3-none-manylinux1_x86_64.whl", hash = "sha256:0e50c707df56c75a2c0703dc6b886f3c97a22f37d6f63839f75b7418ba672a8d", size = 13093657 }, - { url = "https://files.pythonhosted.org/packages/74/42/9f5c5cc084ce6f3073048c4f6806f45ba4c8c73f227c9587215d9c372e05/nvidia_cuda_cupti_cu11-11.8.87-py3-none-manylinux2014_x86_64.whl", hash = "sha256:4191a17913a706b5098681280cd089cd7d8d3df209a6f5cb79384974a96d24f2", size = 13093662 }, -] - -[[package]] -name = "nvidia-cuda-nvrtc-cu11" -version = "11.8.89" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/83/08/a9833e4e9f9165bedb7f36033b47aa399b053b9cb2eaf7b84d1e28705cf7/nvidia_cuda_nvrtc_cu11-11.8.89-py3-none-manylinux1_x86_64.whl", hash = "sha256:1f27d67b0f72902e9065ae568b4f6268dfe49ba3ed269c9a3da99bb86d1d2008", size = 23173264 }, - { url = "https://files.pythonhosted.org/packages/60/44/202e027c224c26e15a53f01c5c7604c7f6b4fd368882d3164ea08fead207/nvidia_cuda_nvrtc_cu11-11.8.89-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a8d02f3cba345be56b1ffc3e74d8f61f02bb758dd31b0f20e12277a5a244f756", size = 23173745 }, -] - -[[package]] -name = "nvidia-cuda-runtime-cu11" -version = "11.8.89" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/45/3e/84db02be49fe6d6df6e42f69fd64501c22d0f9ada9c9877f885612085d20/nvidia_cuda_runtime_cu11-11.8.89-py3-none-manylinux1_x86_64.whl", hash = "sha256:f587bd726eb2f7612cf77ce38a2c1e65cf23251ff49437f6161ce0d647f64f7c", size = 875585 }, - { url = "https://files.pythonhosted.org/packages/a6/ec/a540f28b31de7bc1ed49eecc72035d4cb77db88ead1d42f7bfa5ae407ac6/nvidia_cuda_runtime_cu11-11.8.89-py3-none-manylinux2014_x86_64.whl", hash = "sha256:92d04069a987e1fbc9213f8376d265df0f7bb42617d44f5eda1f496acea7f2d1", size = 875592 }, -] - -[[package]] -name = "nvidia-cudnn-cu11" -version = "8.9.6.50" +name = "omegaconf" +version = "2.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu11", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, - { name = "nvidia-cuda-nvrtc-cu11", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "antlr4-python3-runtime" }, + { name = "pyyaml" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/09/48/6388f1bb9da707110532cb70ec4d2822858ddfb44f1cdf1233c20a80ea4b/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7", size = 3298120 } wheels = [ - { url = "https://files.pythonhosted.org/packages/85/2d/3f083fcff1c302119f48e7b30a5f7b23db793f262f900943a9eb456b9e4d/nvidia_cudnn_cu11-8.9.6.50-py3-none-manylinux1_x86_64.whl", hash = "sha256:319a8f7ca3d65139f1b69998595c7076ae0e4271a325e5dfde50a3ca31f55584", size = 699874407 }, -] - -[[package]] -name = "nvidia-cufft-cu11" -version = "10.9.0.58" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/74/79/b912a77e38e41f15a0581a59f5c3548d1ddfdda3225936fb67c342719e7a/nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux1_x86_64.whl", hash = "sha256:222f9da70c80384632fd6035e4c3f16762d64ea7a843829cb278f98b3cb7dd81", size = 168405414 }, - { url = "https://files.pythonhosted.org/packages/64/c8/133717b43182ba063803e983e7680a94826a9f4ff5734af0ca315803f1b3/nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e21037259995243cc370dd63c430d77ae9280bedb68d5b5a18226bfc92e5d748", size = 168405419 }, -] - -[[package]] -name = "nvidia-curand-cu11" -version = "10.3.0.86" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/49/28/c47f8e2439ddbcbeae3cf74d43ed572b651d630ea72863d5357f3759eb66/nvidia_curand_cu11-10.3.0.86-py3-none-manylinux1_x86_64.whl", hash = "sha256:ac439548c88580269a1eb6aeb602a5aed32f0dbb20809a31d9ed7d01d77f6bf5", size = 58124493 }, - { url = "https://files.pythonhosted.org/packages/58/e5/ce5806afc48a6e4e0dddd25316ac60b6fa94fd1791bdbf4ca17bf52696ea/nvidia_curand_cu11-10.3.0.86-py3-none-manylinux2014_x86_64.whl", hash = "sha256:cd4cffbf78bb06580206b4814d5dc696d1161c902aae37b2bba00056832379e6", size = 58124497 }, -] - -[[package]] -name = "nvidia-cusolver-cu11" -version = "11.4.1.48" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-cublas-cu11", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/55/ee/939ff0104991dd7bdabb4c9767994c612ba0e1c9a55672a1ddd42f5e5b16/nvidia_cusolver_cu11-11.4.1.48-py3-none-manylinux1_x86_64.whl", hash = "sha256:ca538f545645b7e6629140786d3127fe067b3d5a085bd794cde5bfe877c8926f", size = 128240842 }, - { url = "https://files.pythonhosted.org/packages/52/fe/866e87e6e6a1b0a5fcf8524a058042656702f2057e22bfdb8899a7c38e10/nvidia_cusolver_cu11-11.4.1.48-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea9fb1ad8c644ca9ed55af13cc39af3b7ba4c3eb5aef18471fe1fe77d94383cb", size = 128246438 }, -] - -[[package]] -name = "nvidia-cusparse-cu11" -version = "11.7.5.86" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/e0/21b829c535d569831835a4ca5d049a19ba00d3e91f3e12ab4ad27bd7385f/nvidia_cusparse_cu11-11.7.5.86-py3-none-manylinux1_x86_64.whl", hash = "sha256:4ae709fe78d3f23f60acaba8c54b8ad556cf16ca486e0cc1aa92dca7555d2d2b", size = 204126221 }, - { url = "https://files.pythonhosted.org/packages/ed/5c/b0333b07c51ced77397c2fb0d9826072cea0da9d421aa7e792aa0f8ecc72/nvidia_cusparse_cu11-11.7.5.86-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8d7cf1628fd8d462b5d2ba6678fae34733a48ecb80495b9c68672ec6a6dde5ef", size = 204126227 }, -] - -[[package]] -name = "nvidia-nccl-cu11" -version = "2.19.3" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/7d/cc3dbf36c5af39b042d508b7a441ada1fce69bd18c800e5c25dc4e9f8933/nvidia_nccl_cu11-2.19.3-py3-none-manylinux1_x86_64.whl", hash = "sha256:7c58afbeddf7f7c6b7dd7d84a7f4e85462610ee0c656287388b96d89dcf046d5", size = 135288005 }, -] - -[[package]] -name = "nvidia-nvtx-cu11" -version = "11.8.86" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d5/a2/23214c23118784dc2189ac2d2e48190df3e4206e2f73eb17d47140797a2b/nvidia_nvtx_cu11-11.8.86-py3-none-manylinux1_x86_64.whl", hash = "sha256:890656d8bd9b4e280231c832e1f0d03459200ba4824ddda3dcb59b1e1989b9f5", size = 99125 }, - { url = "https://files.pythonhosted.org/packages/b5/ad/973a187b137a3d45dc3faac421ef1275fb41fc169fd3889e2d5ceb0daa54/nvidia_nvtx_cu11-11.8.86-py3-none-manylinux2014_x86_64.whl", hash = "sha256:979f5b2aef5da164c5c53c64c85c3dfa61b8b4704f4f963bb568bf98fa8472e8", size = 99130 }, + { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500 }, ] [[package]] @@ -2059,6 +1948,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/9e/7110d2c5d543ab03b9581dbb1f8e2429863e44e0c9b4960b766f230c1279/opencv_contrib_python-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:47ec3160dae75f70e099b286d1a2e086d20dac8b06e759f60eaf867e6bdecba7", size = 45541421 }, ] +[[package]] +name = "opencv-python" +version = "4.11.0.86" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/06/68c27a523103dad5837dc5b87e71285280c4f098c60e4fe8a8db6486ab09/opencv-python-4.11.0.86.tar.gz", hash = "sha256:03d60ccae62304860d232272e4a4fda93c39d595780cb40b161b310244b736a4", size = 95171956 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/4d/53b30a2a3ac1f75f65a59eb29cf2ee7207ce64867db47036ad61743d5a23/opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:432f67c223f1dc2824f5e73cdfcd9db0efc8710647d4e813012195dc9122a52a", size = 37326322 }, + { url = "https://files.pythonhosted.org/packages/3b/84/0a67490741867eacdfa37bc18df96e08a9d579583b419010d7f3da8ff503/opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:9d05ef13d23fe97f575153558653e2d6e87103995d54e6a35db3f282fe1f9c66", size = 56723197 }, + { url = "https://files.pythonhosted.org/packages/f3/bd/29c126788da65c1fb2b5fb621b7fed0ed5f9122aa22a0868c5e2c15c6d23/opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b92ae2c8852208817e6776ba1ea0d6b1e0a1b5431e971a2a0ddd2a8cc398202", size = 42230439 }, + { url = "https://files.pythonhosted.org/packages/2c/8b/90eb44a40476fa0e71e05a0283947cfd74a5d36121a11d926ad6f3193cc4/opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b02611523803495003bd87362db3e1d2a0454a6a63025dc6658a9830570aa0d", size = 62986597 }, + { url = "https://files.pythonhosted.org/packages/fb/d7/1d5941a9dde095468b288d989ff6539dd69cd429dbf1b9e839013d21b6f0/opencv_python-4.11.0.86-cp37-abi3-win32.whl", hash = "sha256:810549cb2a4aedaa84ad9a1c92fbfdfc14090e2749cedf2c1589ad8359aa169b", size = 29384337 }, + { url = "https://files.pythonhosted.org/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 }, +] + [[package]] name = "opencv-python-headless" version = "4.11.0.86" @@ -2076,18 +1982,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/86/8a/69176a64335aed183529207ba8bc3d329c2999d852b4f3818027203f50e6/opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:6c304df9caa7a6a5710b91709dd4786bf20a74d57672b3c31f7033cc638174ca", size = 39402386 }, ] -[[package]] -name = "opt-einsum" -version = "3.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/7d/bf/9257e53a0e7715bc1127e15063e831f076723c6cd60985333a1c18878fb8/opt_einsum-3.3.0.tar.gz", hash = "sha256:59f6475f77bbc37dcf7cd748519c0ec60722e91e63ca114e68821c0c54a46549", size = 73951 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/19/404708a7e54ad2798907210462fd950c3442ea51acc8790f3da48d2bee8b/opt_einsum-3.3.0-py3-none-any.whl", hash = "sha256:2455e59e3947d3c275477df7f5205b30635e266fe6dc300e3d9f9646bfcea147", size = 65486 }, -] - [[package]] name = "orjson" version = "3.11.3" @@ -2188,74 +2082,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/74/72/c8218cc7489762ab3d1fb25721f8c928f10085e38feb8d52fd6583bfc592/paddleocr-3.2.0-py3-none-any.whl", hash = "sha256:2b942295ad5963de8e01d68afb15a9507d713bc7299e2dfeb198d9c3ac5cf76f", size = 75976 }, ] -[[package]] -name = "paddlepaddle" -version = "3.0.0rc1" -source = { registry = "https://www.paddlepaddle.org.cn/packages/stable/cpu/" } -dependencies = [ - { name = "astor" }, - { name = "decorator" }, - { name = "httpx" }, - { name = "networkx" }, - { name = "numpy" }, - { name = "opt-einsum" }, - { name = "pillow" }, - { name = "protobuf" }, - { name = "typing-extensions" }, -] -wheels = [ - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp310-cp310-linux_aarch64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp310-cp310-linux_x86_64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp310-cp310-macosx_10_9_x86_64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp310-cp310-macosx_11_0_arm64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp310-cp310-win_amd64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp311-cp311-linux_aarch64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp311-cp311-linux_x86_64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp311-cp311-macosx_10_9_x86_64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp311-cp311-macosx_11_0_arm64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp311-cp311-win_amd64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp312-cp312-linux_aarch64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp312-cp312-linux_x86_64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp312-cp312-macosx_10_9_x86_64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp312-cp312-macosx_11_0_arm64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.0.0rc1-cp312-cp312-win_amd64.whl" }, -] - -[[package]] -name = "paddlepaddle-gpu" -version = "3.0.0rc1" -source = { registry = "https://www.paddlepaddle.org.cn/packages/stable/cu118/" } -dependencies = [ - { name = "astor" }, - { name = "decorator" }, - { name = "httpx" }, - { name = "networkx" }, - { name = "numpy" }, - { name = "nvidia-cublas-cu11", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu11", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu11", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu11", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu11", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu11", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu11", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu11", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu11", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu11", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu11", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "opt-einsum" }, - { name = "pillow" }, - { name = "protobuf" }, - { name = "typing-extensions" }, -] -wheels = [ - { url = "https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0rc1-cp310-cp310-linux_x86_64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0rc1-cp310-cp310-win_amd64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0rc1-cp311-cp311-linux_x86_64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0rc1-cp311-cp311-win_amd64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0rc1-cp312-cp312-linux_x86_64.whl" }, - { url = "https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0rc1-cp312-cp312-win_amd64.whl" }, -] - [[package]] name = "paddlex" version = "3.2.1" @@ -2967,6 +2793,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f0/a1/a5f4bebaa31d109003909809d88aeb0d4b201463a9ea29308d9e4f9e7655/qudida-0.0.4-py3-none-any.whl", hash = "sha256:4519714c40cd0f2e6c51e1735edae8f8b19f4efe1f33be13e9d644ca5f736dd6", size = 3478 }, ] +[[package]] +name = "rapidocr" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorlog" }, + { name = "numpy" }, + { name = "omegaconf" }, + { name = "opencv-python" }, + { name = "pillow" }, + { name = "pyclipper" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "shapely" }, + { name = "six" }, + { name = "tqdm" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/e4/09ec2657421f1f23eec6b40ecbee77bbf3ff053c1483d8f2ed62d285bcf3/rapidocr-3.4.0-py3-none-any.whl", hash = "sha256:08d72f4c3a566bc76ac5c8d65d1e1c39550222b3b41b73aef976914ce80f48db", size = 15055924 }, +] + [[package]] name = "requests" version = "2.32.3"