mirror of
https://github.com/immich-app/immich
synced 2025-11-07 17:27:20 +00:00
feat: enhance OCR configuration and functionality
- Updated OCR settings to include minimum detection box score, minimum detection score, and minimum recognition score. - Refactored PaddleOCRecognizer to utilize new scoring parameters. - Introduced new database tables for asset OCR data and search functionality. - Modified related services and repositories to support the new OCR features. - Updated translations for improved clarity in settings UI.
This commit is contained in:
parent
df36a09cd3
commit
4d8e51ede6
17 changed files with 180 additions and 51 deletions
|
|
@ -12,10 +12,9 @@ class PaddleOCRecognizer(InferenceModel):
|
|||
depends = []
|
||||
identity = (ModelType.OCR, ModelTask.OCR)
|
||||
|
||||
def __init__(self, model_name: str, min_score: float = 0.9, **model_kwargs: Any) -> None:
|
||||
self.min_score = model_kwargs.pop("minScore", min_score)
|
||||
self.orientation_classify_enabled = model_kwargs.pop("orientationClassifyEnabled", True)
|
||||
self.unwarping_enabled = model_kwargs.pop("unwarpingEnabled", True)
|
||||
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
|
||||
self.orientation_classify_enabled = model_kwargs.get("orientationClassifyEnabled", False)
|
||||
self.unwarping_enabled = model_kwargs.get("unwarpingEnabled", False)
|
||||
super().__init__(model_name, **model_kwargs)
|
||||
self._load()
|
||||
self.loaded = True
|
||||
|
|
@ -28,23 +27,25 @@ class PaddleOCRecognizer(InferenceModel):
|
|||
use_doc_unwarping=self.unwarping_enabled,
|
||||
)
|
||||
|
||||
def configure(self, **kwargs: Any) -> None:
|
||||
self.min_detection_score = kwargs.get("minDetectionScore", 0.3)
|
||||
self.min_detection_box_score = kwargs.get("minDetectionBoxScore", 0.6)
|
||||
self.min_recognition_score = kwargs.get("minRecognitionScore", 0.0)
|
||||
|
||||
def _predict(self, inputs: NDArray[np.uint8] | bytes | Image.Image, **kwargs: Any) -> List[OCROutput]:
|
||||
inputs = decode_cv2(inputs)
|
||||
results = self.model.predict(inputs)
|
||||
valid_texts_and_scores = [
|
||||
(text, score, box)
|
||||
for result in results
|
||||
for text, score, box in zip(result['rec_texts'], result['rec_scores'], result['rec_polys'])
|
||||
if score >= self.min_score
|
||||
]
|
||||
if not valid_texts_and_scores:
|
||||
return []
|
||||
|
||||
results = self.model.predict(
|
||||
inputs,
|
||||
text_det_thresh=self.min_detection_score,
|
||||
text_det_box_thresh=self.min_detection_box_score,
|
||||
text_rec_score_thresh=self.min_recognition_score
|
||||
)
|
||||
return [
|
||||
OCROutput(
|
||||
text=text, confidence=score,
|
||||
x1=box[0][0], y1=box[0][1], x2=box[1][0], y2=box[1][1],
|
||||
x3=box[2][0], y3=box[2][1], x4=box[3][0], y4=box[3][1]
|
||||
)
|
||||
for text, score, box in valid_texts_and_scores
|
||||
for result in results
|
||||
for text, score, box in zip(result['rec_texts'], result['rec_scores'], result['rec_polys'])
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue