refactor(ocr): update OCR schema and response structure to use individual coordinates instead of bounding box, and adjust related service and repository files

This commit is contained in:
CoderKang 2025-06-02 22:26:49 +08:00 committed by mertalev
parent 46ef02342d
commit df36a09cd3
No known key found for this signature in database
GPG key ID: DF6ABC77AAD98C95
7 changed files with 66 additions and 34 deletions

View file

@ -34,13 +34,17 @@ class PaddleOCRecognizer(InferenceModel):
valid_texts_and_scores = [ valid_texts_and_scores = [
(text, score, box) (text, score, box)
for result in results for result in results
for text, score, box in zip(result['rec_texts'], result['rec_scores'], result['rec_boxes'].tolist()) for text, score, box in zip(result['rec_texts'], result['rec_scores'], result['rec_polys'])
if score >= self.min_score if score >= self.min_score
] ]
if not valid_texts_and_scores: if not valid_texts_and_scores:
return [] return []
return [ return [
OCROutput(text=text, confidence=score, boundingBox={"x1": box[0], "y1": box[1], "x2": box[2], "y2": box[3]}) OCROutput(
text=text, confidence=score,
x1=box[0][0], y1=box[0][1], x2=box[1][0], y2=box[1][1],
x3=box[2][0], y3=box[2][1], x4=box[3][0], y4=box[3][1]
)
for text, score, box in valid_texts_and_scores for text, score, box in valid_texts_and_scores
] ]

View file

@ -90,7 +90,14 @@ FacialRecognitionOutput = list[DetectedFace]
class OCROutput(TypedDict): class OCROutput(TypedDict):
text: str text: str
confidence: float confidence: float
boundingBox: BoundingBox x1: int
y1: int
x2: int
y2: int
x3: int
y3: int
x4: int
y4: int
class PipelineEntry(TypedDict): class PipelineEntry(TypedDict):

View file

@ -40,13 +40,19 @@ export type ClipTextualRequest = { [ModelTask.SEARCH]: { [ModelType.TEXTUAL]: Mo
export type ClipTextualResponse = { [ModelTask.SEARCH]: string }; export type ClipTextualResponse = { [ModelTask.SEARCH]: string };
export type OCR = { export type OCR = {
boundingBox: BoundingBox; x1: number;
y1: number;
x2: number;
y2: number;
x3: number;
y3: number;
x4: number;
y4: number;
text: string; text: string;
confidence: number;
}; };
export type OcrRequest = { [ModelTask.OCR]: { [ModelType.OCR]: ModelOptions & { options: { minScore: number } } } }; export type OcrRequest = { [ModelTask.OCR]: { [ModelType.OCR]: ModelOptions & { options: { minScore: number } } } };
export type OcrResponse = { [ModelTask.OCR]: OCR | OCR[] } & VisualResponse; export type OcrResponse = { [ModelTask.OCR]: OCR[] } & VisualResponse;
export type FacialRecognitionRequest = { export type FacialRecognitionRequest = {
[ModelTask.FACIAL_RECOGNITION]: { [ModelTask.FACIAL_RECOGNITION]: {

View file

@ -6,10 +6,14 @@ import { DummyValue, GenerateSql } from 'src/decorators';
export interface OcrInsertData { export interface OcrInsertData {
assetId: string; assetId: string;
boundingBoxX1: number; x1: number;
boundingBoxY1: number; y1: number;
boundingBoxX2: number; x2: number;
boundingBoxY2: number; y2: number;
x3: number;
y3: number;
x4: number;
y4: number;
text: string; text: string;
} }

View file

@ -1,12 +1,14 @@
import { Kysely, sql } from 'kysely'; import { Kysely, sql } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> { export async function up(db: Kysely<any>): Promise<void> {
await sql`CREATE TABLE "asset_ocr" ("id" uuid NOT NULL DEFAULT uuid_generate_v4(), "assetId" uuid NOT NULL, "boundingBoxX1" integer NOT NULL DEFAULT 0, "boundingBoxY1" integer NOT NULL DEFAULT 0, "boundingBoxX2" integer NOT NULL DEFAULT 0, "boundingBoxY2" integer NOT NULL DEFAULT 0, "text" text NOT NULL);`.execute(db); await sql`CREATE TABLE "asset_ocr" ("id" uuid NOT NULL DEFAULT uuid_generate_v4(), "assetId" uuid NOT NULL, "x1" integer NOT NULL, "y1" integer NOT NULL, "x2" integer NOT NULL, "y2" integer NOT NULL, "x3" integer NOT NULL, "y3" integer NOT NULL, "x4" integer NOT NULL, "y4" integer NOT NULL, "text" text NOT NULL);`.execute(db);
await sql`ALTER TABLE "asset_ocr" ADD CONSTRAINT "PK_5c37b36ceef9ac1f688b6c6bf22" PRIMARY KEY ("id");`.execute(db); await sql`ALTER TABLE "asset_ocr" ADD CONSTRAINT "PK_5c37b36ceef9ac1f688b6c6bf22" PRIMARY KEY ("id");`.execute(db);
await sql`ALTER TABLE "asset_ocr" ADD CONSTRAINT "FK_dc592ec504976f5636e28bb84c6" FOREIGN KEY ("assetId") REFERENCES "assets" ("id") ON UPDATE CASCADE ON DELETE CASCADE;`.execute(db); await sql`ALTER TABLE "asset_ocr" ADD CONSTRAINT "FK_dc592ec504976f5636e28bb84c6" FOREIGN KEY ("assetId") REFERENCES "assets" ("id") ON UPDATE CASCADE ON DELETE CASCADE;`.execute(db);
await sql`CREATE INDEX "IDX_dc592ec504976f5636e28bb84c" ON "asset_ocr" ("assetId")`.execute(db);
} }
export async function down(db: Kysely<any>): Promise<void> { export async function down(db: Kysely<any>): Promise<void> {
await sql`DROP INDEX "IDX_dc592ec504976f5636e28bb84c";`.execute(db);
await sql`ALTER TABLE "asset_ocr" DROP CONSTRAINT "PK_5c37b36ceef9ac1f688b6c6bf22";`.execute(db); await sql`ALTER TABLE "asset_ocr" DROP CONSTRAINT "PK_5c37b36ceef9ac1f688b6c6bf22";`.execute(db);
await sql`ALTER TABLE "asset_ocr" DROP CONSTRAINT "FK_dc592ec504976f5636e28bb84c6";`.execute(db); await sql`ALTER TABLE "asset_ocr" DROP CONSTRAINT "FK_dc592ec504976f5636e28bb84c6";`.execute(db);
await sql`DROP TABLE "asset_ocr";`.execute(db); await sql`DROP TABLE "asset_ocr";`.execute(db);

View file

@ -9,21 +9,33 @@ export class AssetOcrTable {
@ForeignKeyColumn(() => AssetTable, { @ForeignKeyColumn(() => AssetTable, {
onDelete: 'CASCADE', onDelete: 'CASCADE',
onUpdate: 'CASCADE', onUpdate: 'CASCADE',
index: false, index: true,
}) })
assetId!: string; assetId!: string;
@Column({ default: 0, type: 'integer' }) @Column({ type: 'integer' })
boundingBoxX1!: number; x1!: number;
@Column({ default: 0, type: 'integer' }) @Column({ type: 'integer' })
boundingBoxY1!: number; y1!: number;
@Column({ default: 0, type: 'integer' }) @Column({ type: 'integer' })
boundingBoxX2!: number; x2!: number;
@Column({ default: 0, type: 'integer' }) @Column({ type: 'integer' })
boundingBoxY2!: number; y2!: number;
@Column({ type: 'integer' })
x3!: number;
@Column({ type: 'integer' })
y3!: number;
@Column({ type: 'integer' })
x4!: number;
@Column({ type: 'integer' })
y4!: number;
@Column({ type: 'text' }) @Column({ type: 'text' })
text!: string; text!: string;

View file

@ -66,14 +66,7 @@ export class OcrService extends BaseService {
machineLearning.ocr machineLearning.ocr
); );
const resultsArray = Array.isArray(ocrResults) ? ocrResults : [ocrResults]; if (ocrResults.length === 0) {
const validResults = resultsArray.filter(result =>
result &&
result.text &&
result.text.trim().length > 0
);
if (validResults.length === 0) {
this.logger.warn(`No valid OCR results for document ${id}`); this.logger.warn(`No valid OCR results for document ${id}`);
await this.assetRepository.upsertJobStatus({ await this.assetRepository.upsertJobStatus({
assetId: asset.id, assetId: asset.id,
@ -83,12 +76,16 @@ export class OcrService extends BaseService {
} }
try { try {
const ocrDataList = validResults.map(result => ({ const ocrDataList = ocrResults.map(result => ({
assetId: id, assetId: id,
boundingBoxX1: result.boundingBox.x1, x1: result.x1,
boundingBoxY1: result.boundingBox.y1, y1: result.y1,
boundingBoxX2: result.boundingBox.x2, x2: result.x2,
boundingBoxY2: result.boundingBox.y2, y2: result.y2,
x3: result.x3,
y3: result.y3,
x4: result.x4,
y4: result.y4,
text: result.text.trim(), text: result.text.trim(),
})); }));
@ -99,7 +96,7 @@ export class OcrService extends BaseService {
ocrAt: new Date(), ocrAt: new Date(),
}); });
this.logger.debug(`Processed ${validResults.length} OCR result(s) for ${id}`); this.logger.debug(`Processed ${ocrResults.length} OCR result(s) for ${id}`);
return JobStatus.SUCCESS; return JobStatus.SUCCESS;
} catch (error) { } catch (error) {
this.logger.error(`Failed to insert OCR results for ${id}:`, error); this.logger.error(`Failed to insert OCR results for ${id}:`, error);