diff --git a/server/src/controllers/search.controller.ts b/server/src/controllers/search.controller.ts index 777525755f..161d9b278a 100644 --- a/server/src/controllers/search.controller.ts +++ b/server/src/controllers/search.controller.ts @@ -6,7 +6,6 @@ import { PersonResponseDto } from 'src/dtos/person.dto'; import { LargeAssetSearchDto, MetadataSearchDto, - OcrSearchDto, PlacesResponseDto, RandomSearchDto, SearchExploreResponseDto, @@ -16,7 +15,7 @@ import { SearchStatisticsResponseDto, SearchSuggestionRequestDto, SmartSearchDto, - StatisticsSearchDto, + StatisticsSearchDto } from 'src/dtos/search.dto'; import { Permission } from 'src/enum'; import { Auth, Authenticated } from 'src/middleware/auth.guard'; @@ -62,13 +61,6 @@ export class SearchController { return this.service.searchSmart(auth, dto); } - @Post('ocr') - @HttpCode(HttpStatus.OK) - @Authenticated() - searchOcr(@Auth() auth: AuthDto, @Body() dto: OcrSearchDto): Promise { - return this.service.searchOcr(auth, dto); - } - @Get('explore') @Authenticated({ permission: Permission.AssetRead }) getExploreData(@Auth() auth: AuthDto): Promise { diff --git a/server/src/queries/asset.job.repository.sql b/server/src/queries/asset.job.repository.sql index 471de1ac34..cede68cd7d 100644 --- a/server/src/queries/asset.job.repository.sql +++ b/server/src/queries/asset.job.repository.sql @@ -285,6 +285,25 @@ from where "asset"."id" = $2 +-- AssetJobRepository.getForOcr +select + "assets"."visibility", + ( + select + "asset_files"."id", + "asset_files"."path", + "asset_files"."type" + from + "asset_files" + where + "asset_files"."assetId" = "assets"."id" + and "asset_files"."type" = $1 + ) as "previewFile" +from + "assets" +where + "assets"."id" = $2 + -- AssetJobRepository.getForSyncAssets select "asset"."id", @@ -483,6 +502,17 @@ where order by "asset"."fileCreatedAt" desc +-- AssetJobRepository.streamForOcrJob +select + "assets"."id" +from + "assets" + inner join "asset_job_status" on "asset_job_status"."assetId" = "assets"."id" +where + "asset_job_status"."ocrAt" is null + and "assets"."deletedAt" is null + and "assets"."visibility" != $1 + -- AssetJobRepository.streamForMigrationJob select "id" diff --git a/server/src/queries/ocr.repository.sql b/server/src/queries/ocr.repository.sql new file mode 100644 index 0000000000..566997f1ae --- /dev/null +++ b/server/src/queries/ocr.repository.sql @@ -0,0 +1,54 @@ +-- NOTE: This file is auto generated by ./sql-generator + +-- OcrRepository.getById +select + "asset_ocr".* +from + "asset_ocr" +where + "asset_ocr"."id" = $1 + +-- OcrRepository.getByAssetId +select + "asset_ocr".* +from + "asset_ocr" +where + "asset_ocr"."assetId" = $1 + +-- OcrRepository.upsert +with + "deleted_ocr" as ( + delete from "asset_ocr" + where + "assetId" = $1 + ), + "inserted_ocr" as ( + insert into + "asset_ocr" ( + "assetId", + "x1", + "y1", + "x2", + "y2", + "x3", + "y3", + "x4", + "y4", + "text", + "confidence" + ) + values + ($2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12) + ), + "inserted_search" as ( + insert into + "ocr_search" ("assetId", "text") + values + ($13, $14) + on conflict ("assetId") do update + set + "text" = "excluded"."text" + ) +select + 1 as "dummy" diff --git a/server/src/repositories/asset-job.repository.ts b/server/src/repositories/asset-job.repository.ts index 0e545e12b1..18827aeb07 100644 --- a/server/src/repositories/asset-job.repository.ts +++ b/server/src/repositories/asset-job.repository.ts @@ -16,6 +16,7 @@ import { withExifInner, withFaces, withFacesAndPeople, + withFile, withFiles, } from 'src/utils/database'; @@ -192,6 +193,15 @@ export class AssetJobRepository { .executeTakeFirst(); } + @GenerateSql({ params: [DummyValue.UUID] }) + getForOcr(id: string) { + return this.db + .selectFrom('assets') + .select((eb) => ['assets.visibility', withFile(eb, AssetFileType.PREVIEW).as('previewFile')]) + .where('assets.id', '=', id) + .executeTakeFirst(); + } + @GenerateSql({ params: [[DummyValue.UUID]] }) getForSyncAssets(ids: string[]) { return this.db @@ -351,16 +361,16 @@ export class AssetJobRepository { @GenerateSql({ params: [], stream: true }) streamForOcrJob(force?: boolean) { return this.db - .selectFrom('assets') - .select(['assets.id']) - .$if(!force, (qb) => - qb - .innerJoin('asset_job_status', 'asset_job_status.assetId', 'assets.id') - .where('asset_job_status.ocrAt', 'is', null), - ) - .where('assets.deletedAt', 'is', null) - .where('assets.visibility', '!=', AssetVisibility.HIDDEN) - .stream(); + .selectFrom('assets') + .select(['assets.id']) + .$if(!force, (qb) => + qb + .innerJoin('asset_job_status', 'asset_job_status.assetId', 'assets.id') + .where('asset_job_status.ocrAt', 'is', null), + ) + .where('assets.deletedAt', 'is', null) + .where('assets.visibility', '!=', AssetVisibility.HIDDEN) + .stream(); } @GenerateSql({ params: [DummyValue.DATE], stream: true }) diff --git a/server/src/repositories/ocr.repository.ts b/server/src/repositories/ocr.repository.ts index a9fd594ed3..8a9548193b 100644 --- a/server/src/repositories/ocr.repository.ts +++ b/server/src/repositories/ocr.repository.ts @@ -1,10 +1,11 @@ import { Injectable } from '@nestjs/common'; -import { Kysely, QueryCreator, sql } from 'kysely'; +import { Kysely, sql } from 'kysely'; import { InjectKysely } from 'nestjs-kysely'; import { DB } from 'src/db'; import { DummyValue, GenerateSql } from 'src/decorators'; export interface OcrInsertData { + assetId: string; x1: number; y1: number; x2: number; @@ -22,57 +23,59 @@ export class OcrRepository { constructor(@InjectKysely() private db: Kysely) {} @GenerateSql({ params: [DummyValue.UUID] }) - async getById(id: string) { - return this.db - .selectFrom('asset_ocr') - .selectAll('asset_ocr') - .where('asset_ocr.assetId', '=', id) - .executeTakeFirst(); + getById(id: string) { + return this.db.selectFrom('asset_ocr').selectAll('asset_ocr').where('asset_ocr.id', '=', id).executeTakeFirst(); } - async deleteAll(): Promise { - await sql`truncate ${sql.table('asset_ocr')}`.execute(this.db); - await sql`truncate ${sql.table('ocr_search')}`.execute(this.db); + @GenerateSql({ params: [DummyValue.UUID] }) + getByAssetId(id: string) { + return this.db.selectFrom('asset_ocr').selectAll('asset_ocr').where('asset_ocr.assetId', '=', id).execute(); } - async upsert(assetId: string, ocrDataList: OcrInsertData[]): Promise { + deleteAll() { + return this.db.transaction().execute(async (trx: Kysely) => { + await sql`truncate ${sql.table('asset_ocr')}`.execute(trx); + await sql`truncate ${sql.table('ocr_search')}`.execute(trx); + }); + } + + @GenerateSql({ + params: [ + DummyValue.UUID, + [ + { + assetId: DummyValue.UUID, + x1: DummyValue.NUMBER, + y1: DummyValue.NUMBER, + x2: DummyValue.NUMBER, + y2: DummyValue.NUMBER, + x3: DummyValue.NUMBER, + y3: DummyValue.NUMBER, + x4: DummyValue.NUMBER, + y4: DummyValue.NUMBER, + text: DummyValue.STRING, + confidence: DummyValue.NUMBER, + }, + ], + ], + }) + upsert(assetId: string, ocrDataList: OcrInsertData[]) { if (ocrDataList.length === 0) { return; } - const assetOcrData = ocrDataList.map(item => ({ - assetId, - ...item, - })); + const searchText = ocrDataList.map((item) => item.text.trim()).join(' '); - const searchText = ocrDataList.map(item => item.text.trim()).join(''); - - await this.db.transaction().execute(async (trx: Kysely) => { - await trx - .with('deleted_ocr', (db: QueryCreator) => - db.deleteFrom('asset_ocr').where('assetId', '=', assetId).returningAll() - ) - .insertInto('asset_ocr') - .values(assetOcrData) - .execute(); - - if (searchText.trim()) { - await trx - .with('deleted_search', (db: QueryCreator) => - db.deleteFrom('ocr_search').where('assetId', '=', assetId).returningAll() - ) + return this.db + .with('deleted_ocr', (db) => db.deleteFrom('asset_ocr').where('assetId', '=', assetId)) + .with('inserted_ocr', (db) => db.insertInto('asset_ocr').values(ocrDataList)) + .with('inserted_search', (db) => + db .insertInto('ocr_search') - .values({ - assetId, - text: searchText, - }) - .execute(); - } else { - await trx - .deleteFrom('ocr_search') - .where('assetId', '=', assetId) - .execute(); - } - }); + .values({ assetId, text: searchText }) + .onConflict((oc) => oc.column('assetId').doUpdateSet((eb) => ({ text: eb.ref('excluded.text') }))), + ) + .selectNoFrom(sql`1`.as('dummy')) + .execute(); } } diff --git a/server/src/repositories/search.repository.ts b/server/src/repositories/search.repository.ts index 06898bc09e..0c6a9a5a4c 100644 --- a/server/src/repositories/search.repository.ts +++ b/server/src/repositories/search.repository.ts @@ -85,8 +85,7 @@ export interface SearchEmbeddingOptions { } export interface SearchOcrOptions { - ocr: string; - userIds: string[]; + ocr?: string; } export interface SearchPeopleOptions { @@ -119,7 +118,8 @@ type BaseAssetSearchOptions = SearchDateOptions & SearchUserIdOptions & SearchPeopleOptions & SearchTagOptions & - SearchAlbumOptions; + SearchAlbumOptions & + SearchOcrOptions; export type AssetSearchOptions = BaseAssetSearchOptions & SearchRelationOptions; @@ -132,7 +132,8 @@ export type SmartSearchOptions = SearchDateOptions & SearchStatusOptions & SearchUserIdOptions & SearchPeopleOptions & - SearchTagOptions; + SearchTagOptions & + SearchOcrOptions; export type OcrSearchOptions = SearchDateOptions & SearchOcrOptions; @@ -307,30 +308,6 @@ export class SearchRepository { return this.db.selectFrom('smart_search').selectAll().where('assetId', '=', assetId).executeTakeFirst(); } - @GenerateSql({ - params: [ - { page: 1, size: 100 }, - { - userIds: [DummyValue.UUID], - ocr: DummyValue.STRING, - }, - ], - }) - async searchOcr(pagination: SearchPaginationOptions, options: OcrSearchOptions) { - if (!isValidInteger(pagination.size, { min: 1, max: 1000 })) { - throw new Error(`Invalid value for 'size': ${pagination.size}`); - } - - const items = await searchAssetBuilder(this.db, options) - .innerJoin('ocr_search', 'assets.id', 'ocr_search.assetId') - .where('ocr_search.text', 'ilike', `%${options.ocr}%`) - .limit(pagination.size + 1) - .offset((pagination.page - 1) * pagination.size) - .execute(); - - return paginationHelper(items, pagination.size); - } - @GenerateSql({ params: [ { diff --git a/server/src/schema/migrations/1748926208942-CreateAssetOCRTable.ts b/server/src/schema/migrations/1748926208942-CreateAssetOCRTable.ts index 4bf4d61dd8..f968f17dc1 100644 --- a/server/src/schema/migrations/1748926208942-CreateAssetOCRTable.ts +++ b/server/src/schema/migrations/1748926208942-CreateAssetOCRTable.ts @@ -1,7 +1,9 @@ import { Kysely, sql } from 'kysely'; export async function up(db: Kysely): Promise { - await sql`CREATE TABLE "asset_ocr" ("id" uuid NOT NULL DEFAULT uuid_generate_v4(), "assetId" uuid NOT NULL, "x1" integer NOT NULL, "y1" integer NOT NULL, "x2" integer NOT NULL, "y2" integer NOT NULL, "x3" integer NOT NULL, "y3" integer NOT NULL, "x4" integer NOT NULL, "y4" integer NOT NULL, "text" text NOT NULL, "confidence" double precision NOT NULL);`.execute(db); + await sql`CREATE TABLE "asset_ocr" ("id" uuid NOT NULL DEFAULT uuid_generate_v4(), "assetId" uuid NOT NULL, "x1" integer NOT NULL, "y1" integer NOT NULL, "x2" integer NOT NULL, "y2" integer NOT NULL, "x3" integer NOT NULL, "y3" integer NOT NULL, "x4" integer NOT NULL, "y4" integer NOT NULL, "text" text NOT NULL, "confidence" real NOT NULL);`.execute( + db, + ); await sql`ALTER TABLE "asset_ocr" ADD CONSTRAINT "PK_5c37b36ceef9ac1f688b6c6bf22" PRIMARY KEY ("id");`.execute(db); await sql`ALTER TABLE "asset_ocr" ADD CONSTRAINT "FK_dc592ec504976f5636e28bb84c6" FOREIGN KEY ("assetId") REFERENCES "assets" ("id") ON UPDATE CASCADE ON DELETE CASCADE;`.execute(db); await sql`CREATE INDEX "IDX_dc592ec504976f5636e28bb84c" ON "asset_ocr" ("assetId")`.execute(db); diff --git a/server/src/schema/migrations/1748929348618-CreateOCRSearchTable.ts b/server/src/schema/migrations/1748929348618-CreateOCRSearchTable.ts index 388779a2fd..4598ad0763 100644 --- a/server/src/schema/migrations/1748929348618-CreateOCRSearchTable.ts +++ b/server/src/schema/migrations/1748929348618-CreateOCRSearchTable.ts @@ -4,6 +4,9 @@ export async function up(db: Kysely): Promise { await sql`CREATE TABLE "ocr_search" ("assetId" uuid NOT NULL, "text" text NOT NULL);`.execute(db); await sql`ALTER TABLE "ocr_search" ADD CONSTRAINT "PK_a8299b7f08ef223f6d32f4482a7" PRIMARY KEY ("assetId");`.execute(db); await sql`ALTER TABLE "ocr_search" ADD CONSTRAINT "FK_a8299b7f08ef223f6d32f4482a7" FOREIGN KEY ("assetId") REFERENCES "assets" ("id") ON UPDATE CASCADE ON DELETE CASCADE;`.execute(db); + await sql`CREATE INDEX "idx_ocr_search_text" ON "ocr_search" USING gin (f_unaccent("text") gin_trgm_ops);`.execute( + db, + ); } export async function down(db: Kysely): Promise { diff --git a/server/src/schema/tables/asset-ocr.table.ts b/server/src/schema/tables/asset-ocr.table.ts index e7244c2bfa..532f593230 100644 --- a/server/src/schema/tables/asset-ocr.table.ts +++ b/server/src/schema/tables/asset-ocr.table.ts @@ -40,6 +40,6 @@ export class AssetOcrTable { @Column({ type: 'text' }) text!: string; - @Column({ type: 'double precision' }) + @Column({ type: 'real' }) confidence!: number; } diff --git a/server/src/schema/tables/ocr-search.table.ts b/server/src/schema/tables/ocr-search.table.ts index e23fd963a9..4174891e63 100644 --- a/server/src/schema/tables/ocr-search.table.ts +++ b/server/src/schema/tables/ocr-search.table.ts @@ -1,7 +1,12 @@ import { AssetTable } from 'src/schema/tables/asset.table'; -import { Column, ForeignKeyColumn, Table } from 'src/sql-tools'; +import { Column, ForeignKeyColumn, Index, Table } from 'src/sql-tools'; @Table('ocr_search') +@Index({ + name: 'idx_ocr_search_text', + using: 'gin', + expression: 'f_unaccent("text") gin_trgm_ops', +}) export class OcrSearchTable { @ForeignKeyColumn(() => AssetTable, { onDelete: 'CASCADE', diff --git a/server/src/services/ocr.service.ts b/server/src/services/ocr.service.ts index e027fbf8cf..ce41b9500f 100644 --- a/server/src/services/ocr.service.ts +++ b/server/src/services/ocr.service.ts @@ -1,19 +1,13 @@ import { Injectable } from '@nestjs/common'; import { JOBS_ASSET_PAGINATION_SIZE } from 'src/constants'; import { OnJob } from 'src/decorators'; -import { - JobName, - JobStatus, - QueueName, -} from 'src/enum'; +import { AssetVisibility, JobName, JobStatus, QueueName } from 'src/enum'; import { BaseService } from 'src/services/base.service'; import { JobItem, JobOf } from 'src/types'; -import { getAssetFiles } from 'src/utils/asset.util'; import { isOcrEnabled } from 'src/utils/misc'; @Injectable() export class OcrService extends BaseService { - @OnJob({ name: JobName.QUEUE_OCR, queue: QueueName.OCR }) async handleQueueOcr({ force, nightly }: JobOf): Promise { const { machineLearning } = await this.getConfig({ withCache: false }); @@ -48,35 +42,24 @@ export class OcrService extends BaseService { return JobStatus.SKIPPED; } - const relations = { files: true }; - const asset = await this.assetRepository.getById(id, relations); - if (!asset) { + const asset = await this.assetJobRepository.getForOcr(id); + if (!asset || !asset.previewFile) { return JobStatus.FAILED; } - if (!asset.files) { - return JobStatus.FAILED; - } - const { previewFile } = getAssetFiles(asset.files); - if (!previewFile) { - return JobStatus.FAILED; + + if (asset.visibility === AssetVisibility.HIDDEN) { + return JobStatus.SKIPPED; } + const ocrResults = await this.machineLearningRepository.ocr( machineLearning.urls, - previewFile.path, - machineLearning.ocr + asset.previewFile, + machineLearning.ocr, ); - if (ocrResults.length === 0) { - this.logger.warn(`No valid OCR results for document ${id}`); - await this.assetRepository.upsertJobStatus({ - assetId: asset.id, - ocrAt: new Date(), - }); - return JobStatus.SUCCESS; - } - - try { - const ocrDataList = ocrResults.map(result => ({ + if (ocrResults.length > 0) { + const ocrDataList = ocrResults.map((result) => ({ + assetId: id, x1: result.x1, y1: result.y1, x2: result.x2, @@ -85,22 +68,16 @@ export class OcrService extends BaseService { y3: result.y3, x4: result.x4, y4: result.y4, - text: result.text.trim(), + text: result.text, confidence: result.confidence, })); await this.ocrRepository.upsert(id, ocrDataList); - await this.assetRepository.upsertJobStatus({ - assetId: asset.id, - ocrAt: new Date(), - }); - - this.logger.debug(`Processed ${ocrResults.length} OCR result(s) for ${id}`); - return JobStatus.SUCCESS; - } catch (error) { - this.logger.error(`Failed to insert OCR results for ${id}:`, error); - return JobStatus.FAILED; } - } -} \ No newline at end of file + await this.assetRepository.upsertJobStatus({ assetId: id, ocrAt: new Date() }); + + this.logger.debug(`Processed ${ocrResults.length} OCR result(s) for ${id}`); + return JobStatus.SUCCESS; + } +} diff --git a/server/src/services/search.service.ts b/server/src/services/search.service.ts index 5dd8c3eb60..51a2c94338 100644 --- a/server/src/services/search.service.ts +++ b/server/src/services/search.service.ts @@ -7,7 +7,6 @@ import { LargeAssetSearchDto, mapPlaces, MetadataSearchDto, - OcrSearchDto, PlacesResponseDto, RandomSearchDto, SearchPeopleDto, @@ -23,7 +22,7 @@ import { AssetOrder, AssetVisibility, Permission } from 'src/enum'; import { BaseService } from 'src/services/base.service'; import { requireElevatedPermission } from 'src/utils/access'; import { getMyPartnerIds } from 'src/utils/asset.util'; -import { isOcrEnabled, isSmartSearchEnabled } from 'src/utils/misc'; +import { isSmartSearchEnabled } from 'src/utils/misc'; @Injectable() export class SearchService extends BaseService { @@ -146,23 +145,6 @@ export class SearchService extends BaseService { return this.mapResponse(items, hasNextPage ? (page + 1).toString() : null, { auth }); } - async searchOcr(auth: AuthDto, dto: OcrSearchDto): Promise { - const { machineLearning } = await this.getConfig({ withCache: false }); - if (!isOcrEnabled(machineLearning)) { - throw new BadRequestException('OCR is not enabled'); - } - - const userIds = await this.getUserIdsToSearch(auth); - const page = dto.page ?? 1; - const size = dto.size || 250; - const { items, hasNextPage } = await this.searchRepository.searchOcr( - { page, size }, - { ...dto, userIds }, - ); - - return this.mapResponse(items, hasNextPage ? (page + 1).toString() : null, { auth }); - } - async getAssetsByCity(auth: AuthDto): Promise { const userIds = await this.getUserIdsToSearch(auth); const assets = await this.searchRepository.getAssetsByCity(userIds); diff --git a/server/src/sql-tools/types.ts b/server/src/sql-tools/types.ts index 9529067040..899ba1b963 100644 --- a/server/src/sql-tools/types.ts +++ b/server/src/sql-tools/types.ts @@ -322,7 +322,8 @@ export type ColumnType = | 'uuid' | 'vector' | 'enum' - | 'serial'; + | 'serial' + | 'real'; export type DatabaseSchema = { databaseName: string; diff --git a/server/src/utils/database.ts b/server/src/utils/database.ts index d9fe6b7897..36ad2a7003 100644 --- a/server/src/utils/database.ts +++ b/server/src/utils/database.ts @@ -200,6 +200,14 @@ export function withFiles(eb: ExpressionBuilder, type?: AssetFileTy ).as('files'); } +export function withFile(eb: ExpressionBuilder, type: AssetFileType) { + return eb + .selectFrom('asset_file') + .select(columns.assetFiles) + .whereRef('asset_file.assetId', '=', 'asset.id') + .where('asset_file.type', '=', type); +} + export function withFacesAndPeople(eb: ExpressionBuilder, withDeletedFace?: boolean) { return jsonArrayFrom( eb @@ -380,6 +388,11 @@ export function searchAssetBuilder(kysely: Kysely, options: AssetSearchBuild .innerJoin('asset_exif', 'asset.id', 'asset_exif.assetId') .where(sql`f_unaccent(asset_exif.description)`, 'ilike', sql`'%' || f_unaccent(${options.description}) || '%'`), ) + .$if(!!options.ocr, (qb) => + qb + .innerJoin('ocr_search', 'assets.id', 'ocr_search.assetId') + .where(() => sql`f_unaccent(ocr_search.text) %>> f_unaccent(${options.ocr!})`), + ) .$if(!!options.type, (qb) => qb.where('asset.type', '=', options.type!)) .$if(options.isFavorite !== undefined, (qb) => qb.where('asset.isFavorite', '=', options.isFavorite!)) .$if(options.isOffline !== undefined, (qb) => qb.where('asset.isOffline', '=', options.isOffline!))