sql changes

This commit is contained in:
mertalev 2025-06-09 13:31:31 -04:00
parent 4d8e51ede6
commit 08e54ec5c1
No known key found for this signature in database
GPG key ID: DF6ABC77AAD98C95
14 changed files with 204 additions and 155 deletions

View file

@ -6,7 +6,6 @@ import { PersonResponseDto } from 'src/dtos/person.dto';
import {
LargeAssetSearchDto,
MetadataSearchDto,
OcrSearchDto,
PlacesResponseDto,
RandomSearchDto,
SearchExploreResponseDto,
@ -16,7 +15,7 @@ import {
SearchStatisticsResponseDto,
SearchSuggestionRequestDto,
SmartSearchDto,
StatisticsSearchDto,
StatisticsSearchDto
} from 'src/dtos/search.dto';
import { Permission } from 'src/enum';
import { Auth, Authenticated } from 'src/middleware/auth.guard';
@ -62,13 +61,6 @@ export class SearchController {
return this.service.searchSmart(auth, dto);
}
@Post('ocr')
@HttpCode(HttpStatus.OK)
@Authenticated()
searchOcr(@Auth() auth: AuthDto, @Body() dto: OcrSearchDto): Promise<SearchResponseDto> {
return this.service.searchOcr(auth, dto);
}
@Get('explore')
@Authenticated({ permission: Permission.AssetRead })
getExploreData(@Auth() auth: AuthDto): Promise<SearchExploreResponseDto[]> {

View file

@ -285,6 +285,25 @@ from
where
"asset"."id" = $2
-- AssetJobRepository.getForOcr
select
"assets"."visibility",
(
select
"asset_files"."id",
"asset_files"."path",
"asset_files"."type"
from
"asset_files"
where
"asset_files"."assetId" = "assets"."id"
and "asset_files"."type" = $1
) as "previewFile"
from
"assets"
where
"assets"."id" = $2
-- AssetJobRepository.getForSyncAssets
select
"asset"."id",
@ -483,6 +502,17 @@ where
order by
"asset"."fileCreatedAt" desc
-- AssetJobRepository.streamForOcrJob
select
"assets"."id"
from
"assets"
inner join "asset_job_status" on "asset_job_status"."assetId" = "assets"."id"
where
"asset_job_status"."ocrAt" is null
and "assets"."deletedAt" is null
and "assets"."visibility" != $1
-- AssetJobRepository.streamForMigrationJob
select
"id"

View file

@ -0,0 +1,54 @@
-- NOTE: This file is auto generated by ./sql-generator
-- OcrRepository.getById
select
"asset_ocr".*
from
"asset_ocr"
where
"asset_ocr"."id" = $1
-- OcrRepository.getByAssetId
select
"asset_ocr".*
from
"asset_ocr"
where
"asset_ocr"."assetId" = $1
-- OcrRepository.upsert
with
"deleted_ocr" as (
delete from "asset_ocr"
where
"assetId" = $1
),
"inserted_ocr" as (
insert into
"asset_ocr" (
"assetId",
"x1",
"y1",
"x2",
"y2",
"x3",
"y3",
"x4",
"y4",
"text",
"confidence"
)
values
($2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
),
"inserted_search" as (
insert into
"ocr_search" ("assetId", "text")
values
($13, $14)
on conflict ("assetId") do update
set
"text" = "excluded"."text"
)
select
1 as "dummy"

View file

@ -16,6 +16,7 @@ import {
withExifInner,
withFaces,
withFacesAndPeople,
withFile,
withFiles,
} from 'src/utils/database';
@ -192,6 +193,15 @@ export class AssetJobRepository {
.executeTakeFirst();
}
@GenerateSql({ params: [DummyValue.UUID] })
getForOcr(id: string) {
return this.db
.selectFrom('assets')
.select((eb) => ['assets.visibility', withFile(eb, AssetFileType.PREVIEW).as('previewFile')])
.where('assets.id', '=', id)
.executeTakeFirst();
}
@GenerateSql({ params: [[DummyValue.UUID]] })
getForSyncAssets(ids: string[]) {
return this.db

View file

@ -1,10 +1,11 @@
import { Injectable } from '@nestjs/common';
import { Kysely, QueryCreator, sql } from 'kysely';
import { Kysely, sql } from 'kysely';
import { InjectKysely } from 'nestjs-kysely';
import { DB } from 'src/db';
import { DummyValue, GenerateSql } from 'src/decorators';
export interface OcrInsertData {
assetId: string;
x1: number;
y1: number;
x2: number;
@ -22,57 +23,59 @@ export class OcrRepository {
constructor(@InjectKysely() private db: Kysely<DB>) {}
@GenerateSql({ params: [DummyValue.UUID] })
async getById(id: string) {
return this.db
.selectFrom('asset_ocr')
.selectAll('asset_ocr')
.where('asset_ocr.assetId', '=', id)
.executeTakeFirst();
getById(id: string) {
return this.db.selectFrom('asset_ocr').selectAll('asset_ocr').where('asset_ocr.id', '=', id).executeTakeFirst();
}
async deleteAll(): Promise<void> {
await sql`truncate ${sql.table('asset_ocr')}`.execute(this.db);
await sql`truncate ${sql.table('ocr_search')}`.execute(this.db);
@GenerateSql({ params: [DummyValue.UUID] })
getByAssetId(id: string) {
return this.db.selectFrom('asset_ocr').selectAll('asset_ocr').where('asset_ocr.assetId', '=', id).execute();
}
async upsert(assetId: string, ocrDataList: OcrInsertData[]): Promise<void> {
deleteAll() {
return this.db.transaction().execute(async (trx: Kysely<DB>) => {
await sql`truncate ${sql.table('asset_ocr')}`.execute(trx);
await sql`truncate ${sql.table('ocr_search')}`.execute(trx);
});
}
@GenerateSql({
params: [
DummyValue.UUID,
[
{
assetId: DummyValue.UUID,
x1: DummyValue.NUMBER,
y1: DummyValue.NUMBER,
x2: DummyValue.NUMBER,
y2: DummyValue.NUMBER,
x3: DummyValue.NUMBER,
y3: DummyValue.NUMBER,
x4: DummyValue.NUMBER,
y4: DummyValue.NUMBER,
text: DummyValue.STRING,
confidence: DummyValue.NUMBER,
},
],
],
})
upsert(assetId: string, ocrDataList: OcrInsertData[]) {
if (ocrDataList.length === 0) {
return;
}
const assetOcrData = ocrDataList.map(item => ({
assetId,
...item,
}));
const searchText = ocrDataList.map((item) => item.text.trim()).join(' ');
const searchText = ocrDataList.map(item => item.text.trim()).join('');
await this.db.transaction().execute(async (trx: Kysely<DB>) => {
await trx
.with('deleted_ocr', (db: QueryCreator<DB>) =>
db.deleteFrom('asset_ocr').where('assetId', '=', assetId).returningAll()
)
.insertInto('asset_ocr')
.values(assetOcrData)
.execute();
if (searchText.trim()) {
await trx
.with('deleted_search', (db: QueryCreator<DB>) =>
db.deleteFrom('ocr_search').where('assetId', '=', assetId).returningAll()
)
return this.db
.with('deleted_ocr', (db) => db.deleteFrom('asset_ocr').where('assetId', '=', assetId))
.with('inserted_ocr', (db) => db.insertInto('asset_ocr').values(ocrDataList))
.with('inserted_search', (db) =>
db
.insertInto('ocr_search')
.values({
assetId,
text: searchText,
})
.values({ assetId, text: searchText })
.onConflict((oc) => oc.column('assetId').doUpdateSet((eb) => ({ text: eb.ref('excluded.text') }))),
)
.selectNoFrom(sql`1`.as('dummy'))
.execute();
} else {
await trx
.deleteFrom('ocr_search')
.where('assetId', '=', assetId)
.execute();
}
});
}
}

View file

@ -85,8 +85,7 @@ export interface SearchEmbeddingOptions {
}
export interface SearchOcrOptions {
ocr: string;
userIds: string[];
ocr?: string;
}
export interface SearchPeopleOptions {
@ -119,7 +118,8 @@ type BaseAssetSearchOptions = SearchDateOptions &
SearchUserIdOptions &
SearchPeopleOptions &
SearchTagOptions &
SearchAlbumOptions;
SearchAlbumOptions &
SearchOcrOptions;
export type AssetSearchOptions = BaseAssetSearchOptions & SearchRelationOptions;
@ -132,7 +132,8 @@ export type SmartSearchOptions = SearchDateOptions &
SearchStatusOptions &
SearchUserIdOptions &
SearchPeopleOptions &
SearchTagOptions;
SearchTagOptions &
SearchOcrOptions;
export type OcrSearchOptions = SearchDateOptions & SearchOcrOptions;
@ -307,30 +308,6 @@ export class SearchRepository {
return this.db.selectFrom('smart_search').selectAll().where('assetId', '=', assetId).executeTakeFirst();
}
@GenerateSql({
params: [
{ page: 1, size: 100 },
{
userIds: [DummyValue.UUID],
ocr: DummyValue.STRING,
},
],
})
async searchOcr(pagination: SearchPaginationOptions, options: OcrSearchOptions) {
if (!isValidInteger(pagination.size, { min: 1, max: 1000 })) {
throw new Error(`Invalid value for 'size': ${pagination.size}`);
}
const items = await searchAssetBuilder(this.db, options)
.innerJoin('ocr_search', 'assets.id', 'ocr_search.assetId')
.where('ocr_search.text', 'ilike', `%${options.ocr}%`)
.limit(pagination.size + 1)
.offset((pagination.page - 1) * pagination.size)
.execute();
return paginationHelper(items, pagination.size);
}
@GenerateSql({
params: [
{

View file

@ -1,7 +1,9 @@
import { Kysely, sql } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> {
await sql`CREATE TABLE "asset_ocr" ("id" uuid NOT NULL DEFAULT uuid_generate_v4(), "assetId" uuid NOT NULL, "x1" integer NOT NULL, "y1" integer NOT NULL, "x2" integer NOT NULL, "y2" integer NOT NULL, "x3" integer NOT NULL, "y3" integer NOT NULL, "x4" integer NOT NULL, "y4" integer NOT NULL, "text" text NOT NULL, "confidence" double precision NOT NULL);`.execute(db);
await sql`CREATE TABLE "asset_ocr" ("id" uuid NOT NULL DEFAULT uuid_generate_v4(), "assetId" uuid NOT NULL, "x1" integer NOT NULL, "y1" integer NOT NULL, "x2" integer NOT NULL, "y2" integer NOT NULL, "x3" integer NOT NULL, "y3" integer NOT NULL, "x4" integer NOT NULL, "y4" integer NOT NULL, "text" text NOT NULL, "confidence" real NOT NULL);`.execute(
db,
);
await sql`ALTER TABLE "asset_ocr" ADD CONSTRAINT "PK_5c37b36ceef9ac1f688b6c6bf22" PRIMARY KEY ("id");`.execute(db);
await sql`ALTER TABLE "asset_ocr" ADD CONSTRAINT "FK_dc592ec504976f5636e28bb84c6" FOREIGN KEY ("assetId") REFERENCES "assets" ("id") ON UPDATE CASCADE ON DELETE CASCADE;`.execute(db);
await sql`CREATE INDEX "IDX_dc592ec504976f5636e28bb84c" ON "asset_ocr" ("assetId")`.execute(db);

View file

@ -4,6 +4,9 @@ export async function up(db: Kysely<any>): Promise<void> {
await sql`CREATE TABLE "ocr_search" ("assetId" uuid NOT NULL, "text" text NOT NULL);`.execute(db);
await sql`ALTER TABLE "ocr_search" ADD CONSTRAINT "PK_a8299b7f08ef223f6d32f4482a7" PRIMARY KEY ("assetId");`.execute(db);
await sql`ALTER TABLE "ocr_search" ADD CONSTRAINT "FK_a8299b7f08ef223f6d32f4482a7" FOREIGN KEY ("assetId") REFERENCES "assets" ("id") ON UPDATE CASCADE ON DELETE CASCADE;`.execute(db);
await sql`CREATE INDEX "idx_ocr_search_text" ON "ocr_search" USING gin (f_unaccent("text") gin_trgm_ops);`.execute(
db,
);
}
export async function down(db: Kysely<any>): Promise<void> {

View file

@ -40,6 +40,6 @@ export class AssetOcrTable {
@Column({ type: 'text' })
text!: string;
@Column({ type: 'double precision' })
@Column({ type: 'real' })
confidence!: number;
}

View file

@ -1,7 +1,12 @@
import { AssetTable } from 'src/schema/tables/asset.table';
import { Column, ForeignKeyColumn, Table } from 'src/sql-tools';
import { Column, ForeignKeyColumn, Index, Table } from 'src/sql-tools';
@Table('ocr_search')
@Index({
name: 'idx_ocr_search_text',
using: 'gin',
expression: 'f_unaccent("text") gin_trgm_ops',
})
export class OcrSearchTable {
@ForeignKeyColumn(() => AssetTable, {
onDelete: 'CASCADE',

View file

@ -1,19 +1,13 @@
import { Injectable } from '@nestjs/common';
import { JOBS_ASSET_PAGINATION_SIZE } from 'src/constants';
import { OnJob } from 'src/decorators';
import {
JobName,
JobStatus,
QueueName,
} from 'src/enum';
import { AssetVisibility, JobName, JobStatus, QueueName } from 'src/enum';
import { BaseService } from 'src/services/base.service';
import { JobItem, JobOf } from 'src/types';
import { getAssetFiles } from 'src/utils/asset.util';
import { isOcrEnabled } from 'src/utils/misc';
@Injectable()
export class OcrService extends BaseService {
@OnJob({ name: JobName.QUEUE_OCR, queue: QueueName.OCR })
async handleQueueOcr({ force, nightly }: JobOf<JobName.QUEUE_OCR>): Promise<JobStatus> {
const { machineLearning } = await this.getConfig({ withCache: false });
@ -48,35 +42,24 @@ export class OcrService extends BaseService {
return JobStatus.SKIPPED;
}
const relations = { files: true };
const asset = await this.assetRepository.getById(id, relations);
if (!asset) {
const asset = await this.assetJobRepository.getForOcr(id);
if (!asset || !asset.previewFile) {
return JobStatus.FAILED;
}
if (!asset.files) {
return JobStatus.FAILED;
}
const { previewFile } = getAssetFiles(asset.files);
if (!previewFile) {
return JobStatus.FAILED;
if (asset.visibility === AssetVisibility.HIDDEN) {
return JobStatus.SKIPPED;
}
const ocrResults = await this.machineLearningRepository.ocr(
machineLearning.urls,
previewFile.path,
machineLearning.ocr
asset.previewFile,
machineLearning.ocr,
);
if (ocrResults.length === 0) {
this.logger.warn(`No valid OCR results for document ${id}`);
await this.assetRepository.upsertJobStatus({
assetId: asset.id,
ocrAt: new Date(),
});
return JobStatus.SUCCESS;
}
try {
const ocrDataList = ocrResults.map(result => ({
if (ocrResults.length > 0) {
const ocrDataList = ocrResults.map((result) => ({
assetId: id,
x1: result.x1,
y1: result.y1,
x2: result.x2,
@ -85,22 +68,16 @@ export class OcrService extends BaseService {
y3: result.y3,
x4: result.x4,
y4: result.y4,
text: result.text.trim(),
text: result.text,
confidence: result.confidence,
}));
await this.ocrRepository.upsert(id, ocrDataList);
await this.assetRepository.upsertJobStatus({
assetId: asset.id,
ocrAt: new Date(),
});
}
await this.assetRepository.upsertJobStatus({ assetId: id, ocrAt: new Date() });
this.logger.debug(`Processed ${ocrResults.length} OCR result(s) for ${id}`);
return JobStatus.SUCCESS;
} catch (error) {
this.logger.error(`Failed to insert OCR results for ${id}:`, error);
return JobStatus.FAILED;
}
}
}

View file

@ -7,7 +7,6 @@ import {
LargeAssetSearchDto,
mapPlaces,
MetadataSearchDto,
OcrSearchDto,
PlacesResponseDto,
RandomSearchDto,
SearchPeopleDto,
@ -23,7 +22,7 @@ import { AssetOrder, AssetVisibility, Permission } from 'src/enum';
import { BaseService } from 'src/services/base.service';
import { requireElevatedPermission } from 'src/utils/access';
import { getMyPartnerIds } from 'src/utils/asset.util';
import { isOcrEnabled, isSmartSearchEnabled } from 'src/utils/misc';
import { isSmartSearchEnabled } from 'src/utils/misc';
@Injectable()
export class SearchService extends BaseService {
@ -146,23 +145,6 @@ export class SearchService extends BaseService {
return this.mapResponse(items, hasNextPage ? (page + 1).toString() : null, { auth });
}
async searchOcr(auth: AuthDto, dto: OcrSearchDto): Promise<SearchResponseDto> {
const { machineLearning } = await this.getConfig({ withCache: false });
if (!isOcrEnabled(machineLearning)) {
throw new BadRequestException('OCR is not enabled');
}
const userIds = await this.getUserIdsToSearch(auth);
const page = dto.page ?? 1;
const size = dto.size || 250;
const { items, hasNextPage } = await this.searchRepository.searchOcr(
{ page, size },
{ ...dto, userIds },
);
return this.mapResponse(items, hasNextPage ? (page + 1).toString() : null, { auth });
}
async getAssetsByCity(auth: AuthDto): Promise<AssetResponseDto[]> {
const userIds = await this.getUserIdsToSearch(auth);
const assets = await this.searchRepository.getAssetsByCity(userIds);

View file

@ -322,7 +322,8 @@ export type ColumnType =
| 'uuid'
| 'vector'
| 'enum'
| 'serial';
| 'serial'
| 'real';
export type DatabaseSchema = {
databaseName: string;

View file

@ -200,6 +200,14 @@ export function withFiles(eb: ExpressionBuilder<DB, 'asset'>, type?: AssetFileTy
).as('files');
}
export function withFile(eb: ExpressionBuilder<DB, 'asset'>, type: AssetFileType) {
return eb
.selectFrom('asset_file')
.select(columns.assetFiles)
.whereRef('asset_file.assetId', '=', 'asset.id')
.where('asset_file.type', '=', type);
}
export function withFacesAndPeople(eb: ExpressionBuilder<DB, 'asset'>, withDeletedFace?: boolean) {
return jsonArrayFrom(
eb
@ -380,6 +388,11 @@ export function searchAssetBuilder(kysely: Kysely<DB>, options: AssetSearchBuild
.innerJoin('asset_exif', 'asset.id', 'asset_exif.assetId')
.where(sql`f_unaccent(asset_exif.description)`, 'ilike', sql`'%' || f_unaccent(${options.description}) || '%'`),
)
.$if(!!options.ocr, (qb) =>
qb
.innerJoin('ocr_search', 'assets.id', 'ocr_search.assetId')
.where(() => sql`f_unaccent(ocr_search.text) %>> f_unaccent(${options.ocr!})`),
)
.$if(!!options.type, (qb) => qb.where('asset.type', '=', options.type!))
.$if(options.isFavorite !== undefined, (qb) => qb.where('asset.isFavorite', '=', options.isFavorite!))
.$if(options.isOffline !== undefined, (qb) => qb.where('asset.isOffline', '=', options.isOffline!))