feat(server): near-duplicate detection (#8228)

* duplicate detection job, entity, config

* queueing

* job panel, update api

* use embedding in db instead of fetching

* disable concurrency

* only queue visible assets

* handle multiple duplicateIds

* update concurrent queue check

* add provider

* add web placeholder, server endpoint, migration, various fixes

* update sql

* select embedding by default

* rename variable

* simplify

* remove separate entity, handle re-running with different threshold, set default back to 0.02

* fix tests

* add tests

* add index to entity

* formatting

* update asset mock

* fix `upsertJobStatus` signature

* update sql

* formatting

* default to 0.03

* optimize clustering

* use asset's `duplicateId` if present

* update sql

* update tests

* expose admin setting

* refactor

* formatting

* skip if ml is disabled

* debug trash e2e

* remove from web

* remove from sidebar

* test if ml is disabled

* update sql

* separate duplicate detection from clip in config, disable by default for now

* fix doc

* lower minimum `maxDistance`

* update api

* Add and Use Duplicate Detection Feature Flag (#9364)

* Add Duplicate Detection Flag

* Use Duplicate Detection Flag

* Attempt Fixes for Failing Checks

* lower minimum `maxDistance`

* fix tests

---------

Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>

* chore: fixes and additions after rebase

* chore: update api (remove new Role enum)

* fix: left join smart search so getAll works without machine learning

* test: trash e2e go back to checking length of assets is zero

* chore: regen api after rebase

* test: fix tests after rebase

* redundant join

---------

Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com>
Co-authored-by: Zack Pollard <zackpollard@ymail.com>
Co-authored-by: Zack Pollard <zack@futo.org>
This commit is contained in:
Mert 2024-05-16 13:08:37 -04:00 committed by GitHub
parent 673e97e71d
commit 64636c0618
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
61 changed files with 1254 additions and 61 deletions

View file

@ -111,6 +111,10 @@ export interface SystemConfig {
enabled: boolean;
modelName: string;
};
duplicateDetection: {
enabled: boolean;
maxDistance: number;
};
facialRecognition: {
enabled: boolean;
modelName: string;
@ -249,6 +253,10 @@ export const defaults = Object.freeze<SystemConfig>({
enabled: true,
modelName: 'ViT-B-32__openai',
},
duplicateDetection: {
enabled: false,
maxDistance: 0.03,
},
facialRecognition: {
enabled: true,
modelName: 'buffalo_l',

View file

@ -57,6 +57,11 @@ export class AssetController {
return this.service.getStatistics(auth, dto);
}
@Get('duplicates')
getAssetDuplicates(@Auth() auth: AuthDto): Promise<AssetResponseDto[]> {
return this.service.getDuplicates(auth);
}
@Post('jobs')
@HttpCode(HttpStatus.NO_CONTENT)
@Authenticated()

View file

@ -73,6 +73,9 @@ export class AllJobStatusResponseDto implements Record<QueueName, JobStatusDto>
@ApiProperty({ type: JobStatusDto })
[QueueName.SEARCH]!: JobStatusDto;
@ApiProperty({ type: JobStatusDto })
[QueueName.DUPLICATE_DETECTION]!: JobStatusDto;
@ApiProperty({ type: JobStatusDto })
[QueueName.FACE_DETECTION]!: JobStatusDto;

View file

@ -4,10 +4,12 @@ import { IsEnum, IsNotEmpty, IsNumber, IsString, Max, Min } from 'class-validato
import { CLIPMode, ModelType } from 'src/interfaces/machine-learning.interface';
import { Optional, ValidateBoolean } from 'src/validation';
export class ModelConfig {
export class TaskConfig {
@ValidateBoolean()
enabled!: boolean;
}
export class ModelConfig extends TaskConfig {
@IsString()
@IsNotEmpty()
modelName!: string;
@ -25,6 +27,15 @@ export class CLIPConfig extends ModelConfig {
mode?: CLIPMode;
}
export class DuplicateDetectionConfig extends TaskConfig {
@IsNumber()
@Min(0.001)
@Max(0.1)
@Type(() => Number)
@ApiProperty({ type: 'number', format: 'float' })
maxDistance!: number;
}
export class RecognitionConfig extends ModelConfig {
@IsNumber()
@Min(0)

View file

@ -97,6 +97,7 @@ export class ServerConfigDto {
export class ServerFeaturesDto {
smartSearch!: boolean;
duplicateDetection!: boolean;
configFile!: boolean;
facialRecognition!: boolean;
map!: boolean;

View file

@ -30,7 +30,7 @@ import {
TranscodePolicy,
VideoCodec,
} from 'src/config';
import { CLIPConfig, RecognitionConfig } from 'src/dtos/model-config.dto';
import { CLIPConfig, DuplicateDetectionConfig, RecognitionConfig } from 'src/dtos/model-config.dto';
import { ConcurrentQueueName, QueueName } from 'src/interfaces/job.interface';
import { ValidateBoolean, validateCronExpression } from 'src/validation';
@ -262,6 +262,11 @@ class SystemConfigMachineLearningDto {
@IsObject()
clip!: CLIPConfig;
@Type(() => DuplicateDetectionConfig)
@ValidateNested()
@IsObject()
duplicateDetection!: DuplicateDetectionConfig;
@Type(() => RecognitionConfig)
@ValidateNested()
@IsObject()

View file

@ -15,4 +15,7 @@ export class AssetJobStatusEntity {
@Column({ type: 'timestamptz', nullable: true })
metadataExtractedAt!: Date | null;
@Column({ type: 'timestamptz', nullable: true })
duplicatesDetectedAt!: Date | null;
}

View file

@ -165,6 +165,10 @@ export class AssetEntity {
@OneToOne(() => AssetJobStatusEntity, (jobStatus) => jobStatus.asset, { nullable: true })
jobStatus?: AssetJobStatusEntity;
@Index('IDX_assets_duplicateId')
@Column({ type: 'uuid', nullable: true })
duplicateId!: string | null;
}
export enum AssetType {

View file

@ -11,10 +11,6 @@ export class SmartSearchEntity {
assetId!: string;
@Index('clip_index', { synchronize: false })
@Column({
type: 'float4',
array: true,
select: false,
})
@Column({ type: 'float4', array: true, transformer: { from: (v) => JSON.parse(v), to: (v) => v } })
embedding!: number[];
}

View file

@ -40,6 +40,7 @@ export enum WithoutProperty {
ENCODED_VIDEO = 'encoded-video',
EXIF = 'exif',
SMART_SEARCH = 'smart-search',
DUPLICATE = 'duplicate',
OBJECT_TAGS = 'object-tags',
FACES = 'faces',
PERSON = 'person',
@ -60,6 +61,7 @@ export interface AssetBuilderOptions {
isArchived?: boolean;
isFavorite?: boolean;
isTrashed?: boolean;
isDuplicate?: boolean;
albumId?: string;
personId?: string;
userIds?: string[];
@ -143,6 +145,12 @@ export interface AssetDeltaSyncOptions {
limit: number;
}
export interface AssetUpdateDuplicateOptions {
targetDuplicateId: string | null;
assetIds: string[];
duplicateIds: string[];
}
export type AssetPathEntity = Pick<AssetEntity, 'id' | 'originalPath' | 'isOffline'>;
export const IAssetRepository = 'IAssetRepository';
@ -176,6 +184,7 @@ export interface IAssetRepository {
getAll(pagination: PaginationOptions, options?: AssetSearchOptions): Paginated<AssetEntity>;
getAllByDeviceId(userId: string, deviceId: string): Promise<string[]>;
updateAll(ids: string[], options: Partial<AssetUpdateAllOptions>): Promise<void>;
updateDuplicates(options: AssetUpdateDuplicateOptions): Promise<void>;
update(asset: AssetUpdateOptions): Promise<void>;
remove(asset: AssetEntity): Promise<void>;
softDeleteAll(ids: string[]): Promise<void>;
@ -186,9 +195,10 @@ export interface IAssetRepository {
getTimeBuckets(options: TimeBucketOptions): Promise<TimeBucketItem[]>;
getTimeBucket(timeBucket: string, options: TimeBucketOptions): Promise<AssetEntity[]>;
upsertExif(exif: Partial<ExifEntity>): Promise<void>;
upsertJobStatus(jobStatus: Partial<AssetJobStatusEntity>): Promise<void>;
upsertJobStatus(...jobStatus: Partial<AssetJobStatusEntity>[]): Promise<void>;
getAssetIdByCity(userId: string, options: AssetExploreFieldOptions): Promise<SearchExploreItem<string>>;
getAssetIdByTag(userId: string, options: AssetExploreFieldOptions): Promise<SearchExploreItem<string>>;
getDuplicates(options: AssetBuilderOptions): Promise<AssetEntity[]>;
getAllForUserFullSync(options: AssetFullSyncOptions): Promise<AssetEntity[]>;
getChangedDeltaSync(options: AssetDeltaSyncOptions): Promise<AssetEntity[]>;
}

View file

@ -5,6 +5,7 @@ export enum QueueName {
FACE_DETECTION = 'faceDetection',
FACIAL_RECOGNITION = 'facialRecognition',
SMART_SEARCH = 'smartSearch',
DUPLICATE_DETECTION = 'duplicateDetection',
BACKGROUND_TASK = 'backgroundTask',
STORAGE_TEMPLATE_MIGRATION = 'storageTemplateMigration',
MIGRATION = 'migration',
@ -16,7 +17,7 @@ export enum QueueName {
export type ConcurrentQueueName = Exclude<
QueueName,
QueueName.STORAGE_TEMPLATE_MIGRATION | QueueName.FACIAL_RECOGNITION
QueueName.STORAGE_TEMPLATE_MIGRATION | QueueName.FACIAL_RECOGNITION | QueueName.DUPLICATE_DETECTION
>;
export enum JobCommand {
@ -86,6 +87,10 @@ export enum JobName {
QUEUE_SMART_SEARCH = 'queue-smart-search',
SMART_SEARCH = 'smart-search',
// duplicate detection
QUEUE_DUPLICATE_DETECTION = 'queue-duplicate-detection',
DUPLICATE_DETECTION = 'duplicate-detection',
// XMP sidecars
QUEUE_SIDECAR = 'queue-sidecar',
SIDECAR_DISCOVERY = 'sidecar-discovery',
@ -212,6 +217,10 @@ export type JobItem =
| { name: JobName.QUEUE_SMART_SEARCH; data: IBaseJob }
| { name: JobName.SMART_SEARCH; data: IEntityJob }
// Duplicate Detection
| { name: JobName.QUEUE_DUPLICATE_DETECTION; data: IBaseJob }
| { name: JobName.DUPLICATE_DETECTION; data: IEntityJob }
// Filesystem
| { name: JobName.DELETE_FILES; data: IDeleteFilesJob }

View file

@ -152,15 +152,29 @@ export interface FaceEmbeddingSearch extends SearchEmbeddingOptions {
maxDistance?: number;
}
export interface AssetDuplicateSearch {
assetId: string;
embedding: Embedding;
userIds: string[];
maxDistance?: number;
}
export interface FaceSearchResult {
distance: number;
face: AssetFaceEntity;
}
export interface AssetDuplicateResult {
assetId: string;
duplicateId: string | null;
distance: number;
}
export interface ISearchRepository {
init(modelName: string): Promise<void>;
searchMetadata(pagination: SearchPaginationOptions, options: AssetSearchOptions): Paginated<AssetEntity>;
searchSmart(pagination: SearchPaginationOptions, options: SmartSearchOptions): Paginated<AssetEntity>;
searchDuplicates(options: AssetDuplicateSearch): Promise<AssetDuplicateResult[]>;
searchFaces(search: FaceEmbeddingSearch): Promise<FaceSearchResult[]>;
upsert(assetId: string, embedding: number[]): Promise<void>;
searchPlaces(placeName: string): Promise<GeodataPlacesEntity[]>;

View file

@ -0,0 +1,14 @@
import { MigrationInterface, QueryRunner } from 'typeorm';
export class CreateAssetDuplicateColumns1711989989911 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`ALTER TABLE assets ADD COLUMN "duplicateId" uuid`);
await queryRunner.query(`ALTER TABLE asset_job_status ADD COLUMN "duplicatesDetectedAt" timestamptz`);
await queryRunner.query(`CREATE INDEX "IDX_assets_duplicateId" ON assets ("duplicateId")`);
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`ALTER TABLE assets DROP COLUMN "duplicateId"`);
await queryRunner.query(`ALTER TABLE asset_job_status DROP COLUMN "duplicatesDetectedAt"`);
}
}

View file

@ -30,6 +30,7 @@ SELECT
"entity"."originalFileName" AS "entity_originalFileName",
"entity"."sidecarPath" AS "entity_sidecarPath",
"entity"."stackId" AS "entity_stackId",
"entity"."duplicateId" AS "entity_duplicateId",
"exifInfo"."assetId" AS "exifInfo_assetId",
"exifInfo"."description" AS "exifInfo_description",
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
@ -111,7 +112,8 @@ SELECT
"AssetEntity"."livePhotoVideoId" AS "AssetEntity_livePhotoVideoId",
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
"AssetEntity"."stackId" AS "AssetEntity_stackId"
"AssetEntity"."stackId" AS "AssetEntity_stackId",
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId"
FROM
"assets" "AssetEntity"
WHERE
@ -147,6 +149,7 @@ SELECT
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
"AssetEntity"."stackId" AS "AssetEntity_stackId",
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId",
"AssetEntity__AssetEntity_exifInfo"."assetId" AS "AssetEntity__AssetEntity_exifInfo_assetId",
"AssetEntity__AssetEntity_exifInfo"."description" AS "AssetEntity__AssetEntity_exifInfo_description",
"AssetEntity__AssetEntity_exifInfo"."exifImageWidth" AS "AssetEntity__AssetEntity_exifInfo_exifImageWidth",
@ -230,7 +233,8 @@ SELECT
"bd93d5747511a4dad4923546c51365bf1a803774"."livePhotoVideoId" AS "bd93d5747511a4dad4923546c51365bf1a803774_livePhotoVideoId",
"bd93d5747511a4dad4923546c51365bf1a803774"."originalFileName" AS "bd93d5747511a4dad4923546c51365bf1a803774_originalFileName",
"bd93d5747511a4dad4923546c51365bf1a803774"."sidecarPath" AS "bd93d5747511a4dad4923546c51365bf1a803774_sidecarPath",
"bd93d5747511a4dad4923546c51365bf1a803774"."stackId" AS "bd93d5747511a4dad4923546c51365bf1a803774_stackId"
"bd93d5747511a4dad4923546c51365bf1a803774"."stackId" AS "bd93d5747511a4dad4923546c51365bf1a803774_stackId",
"bd93d5747511a4dad4923546c51365bf1a803774"."duplicateId" AS "bd93d5747511a4dad4923546c51365bf1a803774_duplicateId"
FROM
"assets" "AssetEntity"
LEFT JOIN "exif" "AssetEntity__AssetEntity_exifInfo" ON "AssetEntity__AssetEntity_exifInfo"."assetId" = "AssetEntity"."id"
@ -311,7 +315,8 @@ FROM
"AssetEntity"."livePhotoVideoId" AS "AssetEntity_livePhotoVideoId",
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
"AssetEntity"."stackId" AS "AssetEntity_stackId"
"AssetEntity"."stackId" AS "AssetEntity_stackId",
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId"
FROM
"assets" "AssetEntity"
LEFT JOIN "libraries" "AssetEntity__AssetEntity_library" ON "AssetEntity__AssetEntity_library"."id" = "AssetEntity"."libraryId"
@ -407,7 +412,8 @@ SELECT
"AssetEntity"."livePhotoVideoId" AS "AssetEntity_livePhotoVideoId",
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
"AssetEntity"."stackId" AS "AssetEntity_stackId"
"AssetEntity"."stackId" AS "AssetEntity_stackId",
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId"
FROM
"assets" "AssetEntity"
WHERE
@ -423,6 +429,15 @@ SET
WHERE
"id" IN ($2)
-- AssetRepository.updateDuplicates
UPDATE "assets"
SET
"duplicateId" = $1,
"updatedAt" = CURRENT_TIMESTAMP
WHERE
"duplicateId" IN ($2)
OR "id" IN ($3)
-- AssetRepository.getByChecksum
SELECT
"AssetEntity"."id" AS "AssetEntity_id",
@ -452,7 +467,8 @@ SELECT
"AssetEntity"."livePhotoVideoId" AS "AssetEntity_livePhotoVideoId",
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
"AssetEntity"."stackId" AS "AssetEntity_stackId"
"AssetEntity"."stackId" AS "AssetEntity_stackId",
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId"
FROM
"assets" "AssetEntity"
WHERE
@ -519,7 +535,8 @@ SELECT
"AssetEntity"."livePhotoVideoId" AS "AssetEntity_livePhotoVideoId",
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
"AssetEntity"."stackId" AS "AssetEntity_stackId"
"AssetEntity"."stackId" AS "AssetEntity_stackId",
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId"
FROM
"assets" "AssetEntity"
WHERE
@ -575,6 +592,7 @@ SELECT
"asset"."originalFileName" AS "asset_originalFileName",
"asset"."sidecarPath" AS "asset_sidecarPath",
"asset"."stackId" AS "asset_stackId",
"asset"."duplicateId" AS "asset_duplicateId",
"exifInfo"."assetId" AS "exifInfo_assetId",
"exifInfo"."description" AS "exifInfo_description",
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
@ -632,7 +650,8 @@ SELECT
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
"stackedAssets"."stackId" AS "stackedAssets_stackId"
"stackedAssets"."stackId" AS "stackedAssets_stackId",
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
FROM
"assets" "asset"
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"
@ -713,6 +732,7 @@ SELECT
"asset"."originalFileName" AS "asset_originalFileName",
"asset"."sidecarPath" AS "asset_sidecarPath",
"asset"."stackId" AS "asset_stackId",
"asset"."duplicateId" AS "asset_duplicateId",
"exifInfo"."assetId" AS "exifInfo_assetId",
"exifInfo"."description" AS "exifInfo_description",
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
@ -770,7 +790,8 @@ SELECT
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
"stackedAssets"."stackId" AS "stackedAssets_stackId"
"stackedAssets"."stackId" AS "stackedAssets_stackId",
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
FROM
"assets" "asset"
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"
@ -797,6 +818,112 @@ ORDER BY
)::timestamptz DESC,
"asset"."fileCreatedAt" DESC
-- AssetRepository.getDuplicates
SELECT
"asset"."id" AS "asset_id",
"asset"."deviceAssetId" AS "asset_deviceAssetId",
"asset"."ownerId" AS "asset_ownerId",
"asset"."libraryId" AS "asset_libraryId",
"asset"."deviceId" AS "asset_deviceId",
"asset"."type" AS "asset_type",
"asset"."originalPath" AS "asset_originalPath",
"asset"."previewPath" AS "asset_previewPath",
"asset"."thumbnailPath" AS "asset_thumbnailPath",
"asset"."thumbhash" AS "asset_thumbhash",
"asset"."encodedVideoPath" AS "asset_encodedVideoPath",
"asset"."createdAt" AS "asset_createdAt",
"asset"."updatedAt" AS "asset_updatedAt",
"asset"."deletedAt" AS "asset_deletedAt",
"asset"."fileCreatedAt" AS "asset_fileCreatedAt",
"asset"."localDateTime" AS "asset_localDateTime",
"asset"."fileModifiedAt" AS "asset_fileModifiedAt",
"asset"."isFavorite" AS "asset_isFavorite",
"asset"."isArchived" AS "asset_isArchived",
"asset"."isExternal" AS "asset_isExternal",
"asset"."isOffline" AS "asset_isOffline",
"asset"."checksum" AS "asset_checksum",
"asset"."duration" AS "asset_duration",
"asset"."isVisible" AS "asset_isVisible",
"asset"."livePhotoVideoId" AS "asset_livePhotoVideoId",
"asset"."originalFileName" AS "asset_originalFileName",
"asset"."sidecarPath" AS "asset_sidecarPath",
"asset"."stackId" AS "asset_stackId",
"asset"."duplicateId" AS "asset_duplicateId",
"exifInfo"."assetId" AS "exifInfo_assetId",
"exifInfo"."description" AS "exifInfo_description",
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
"exifInfo"."exifImageHeight" AS "exifInfo_exifImageHeight",
"exifInfo"."fileSizeInByte" AS "exifInfo_fileSizeInByte",
"exifInfo"."orientation" AS "exifInfo_orientation",
"exifInfo"."dateTimeOriginal" AS "exifInfo_dateTimeOriginal",
"exifInfo"."modifyDate" AS "exifInfo_modifyDate",
"exifInfo"."timeZone" AS "exifInfo_timeZone",
"exifInfo"."latitude" AS "exifInfo_latitude",
"exifInfo"."longitude" AS "exifInfo_longitude",
"exifInfo"."projectionType" AS "exifInfo_projectionType",
"exifInfo"."city" AS "exifInfo_city",
"exifInfo"."livePhotoCID" AS "exifInfo_livePhotoCID",
"exifInfo"."autoStackId" AS "exifInfo_autoStackId",
"exifInfo"."state" AS "exifInfo_state",
"exifInfo"."country" AS "exifInfo_country",
"exifInfo"."make" AS "exifInfo_make",
"exifInfo"."model" AS "exifInfo_model",
"exifInfo"."lensModel" AS "exifInfo_lensModel",
"exifInfo"."fNumber" AS "exifInfo_fNumber",
"exifInfo"."focalLength" AS "exifInfo_focalLength",
"exifInfo"."iso" AS "exifInfo_iso",
"exifInfo"."exposureTime" AS "exifInfo_exposureTime",
"exifInfo"."profileDescription" AS "exifInfo_profileDescription",
"exifInfo"."colorspace" AS "exifInfo_colorspace",
"exifInfo"."bitsPerSample" AS "exifInfo_bitsPerSample",
"exifInfo"."fps" AS "exifInfo_fps",
"stack"."id" AS "stack_id",
"stack"."primaryAssetId" AS "stack_primaryAssetId",
"stackedAssets"."id" AS "stackedAssets_id",
"stackedAssets"."deviceAssetId" AS "stackedAssets_deviceAssetId",
"stackedAssets"."ownerId" AS "stackedAssets_ownerId",
"stackedAssets"."libraryId" AS "stackedAssets_libraryId",
"stackedAssets"."deviceId" AS "stackedAssets_deviceId",
"stackedAssets"."type" AS "stackedAssets_type",
"stackedAssets"."originalPath" AS "stackedAssets_originalPath",
"stackedAssets"."previewPath" AS "stackedAssets_previewPath",
"stackedAssets"."thumbnailPath" AS "stackedAssets_thumbnailPath",
"stackedAssets"."thumbhash" AS "stackedAssets_thumbhash",
"stackedAssets"."encodedVideoPath" AS "stackedAssets_encodedVideoPath",
"stackedAssets"."createdAt" AS "stackedAssets_createdAt",
"stackedAssets"."updatedAt" AS "stackedAssets_updatedAt",
"stackedAssets"."deletedAt" AS "stackedAssets_deletedAt",
"stackedAssets"."fileCreatedAt" AS "stackedAssets_fileCreatedAt",
"stackedAssets"."localDateTime" AS "stackedAssets_localDateTime",
"stackedAssets"."fileModifiedAt" AS "stackedAssets_fileModifiedAt",
"stackedAssets"."isFavorite" AS "stackedAssets_isFavorite",
"stackedAssets"."isArchived" AS "stackedAssets_isArchived",
"stackedAssets"."isExternal" AS "stackedAssets_isExternal",
"stackedAssets"."isOffline" AS "stackedAssets_isOffline",
"stackedAssets"."checksum" AS "stackedAssets_checksum",
"stackedAssets"."duration" AS "stackedAssets_duration",
"stackedAssets"."isVisible" AS "stackedAssets_isVisible",
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
"stackedAssets"."stackId" AS "stackedAssets_stackId",
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
FROM
"assets" "asset"
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"
LEFT JOIN "asset_stack" "stack" ON "stack"."id" = "asset"."stackId"
LEFT JOIN "assets" "stackedAssets" ON "stackedAssets"."stackId" = "stack"."id"
AND ("stackedAssets"."deletedAt" IS NULL)
WHERE
(
"asset"."isVisible" = true
AND "asset"."ownerId" IN ($1, $2)
AND "asset"."duplicateId" IS NOT NULL
)
AND ("asset"."deletedAt" IS NULL)
ORDER BY
"asset"."duplicateId" ASC
-- AssetRepository.getAssetIdByCity
WITH
"cities" AS (
@ -887,6 +1014,7 @@ SELECT
"asset"."originalFileName" AS "asset_originalFileName",
"asset"."sidecarPath" AS "asset_sidecarPath",
"asset"."stackId" AS "asset_stackId",
"asset"."duplicateId" AS "asset_duplicateId",
"exifInfo"."assetId" AS "exifInfo_assetId",
"exifInfo"."description" AS "exifInfo_description",
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
@ -944,7 +1072,8 @@ SELECT
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
"stackedAssets"."stackId" AS "stackedAssets_stackId"
"stackedAssets"."stackId" AS "stackedAssets_stackId",
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
FROM
"assets" "asset"
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"
@ -992,6 +1121,7 @@ SELECT
"asset"."originalFileName" AS "asset_originalFileName",
"asset"."sidecarPath" AS "asset_sidecarPath",
"asset"."stackId" AS "asset_stackId",
"asset"."duplicateId" AS "asset_duplicateId",
"exifInfo"."assetId" AS "exifInfo_assetId",
"exifInfo"."description" AS "exifInfo_description",
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
@ -1049,7 +1179,8 @@ SELECT
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
"stackedAssets"."stackId" AS "stackedAssets_stackId"
"stackedAssets"."stackId" AS "stackedAssets_stackId",
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
FROM
"assets" "asset"
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"

View file

@ -174,7 +174,8 @@ FROM
"AssetFaceEntity__AssetFaceEntity_asset"."livePhotoVideoId" AS "AssetFaceEntity__AssetFaceEntity_asset_livePhotoVideoId",
"AssetFaceEntity__AssetFaceEntity_asset"."originalFileName" AS "AssetFaceEntity__AssetFaceEntity_asset_originalFileName",
"AssetFaceEntity__AssetFaceEntity_asset"."sidecarPath" AS "AssetFaceEntity__AssetFaceEntity_asset_sidecarPath",
"AssetFaceEntity__AssetFaceEntity_asset"."stackId" AS "AssetFaceEntity__AssetFaceEntity_asset_stackId"
"AssetFaceEntity__AssetFaceEntity_asset"."stackId" AS "AssetFaceEntity__AssetFaceEntity_asset_stackId",
"AssetFaceEntity__AssetFaceEntity_asset"."duplicateId" AS "AssetFaceEntity__AssetFaceEntity_asset_duplicateId"
FROM
"asset_faces" "AssetFaceEntity"
LEFT JOIN "person" "AssetFaceEntity__AssetFaceEntity_person" ON "AssetFaceEntity__AssetFaceEntity_person"."id" = "AssetFaceEntity"."personId"
@ -272,6 +273,7 @@ FROM
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
"AssetEntity"."stackId" AS "AssetEntity_stackId",
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId",
"AssetEntity__AssetEntity_faces"."id" AS "AssetEntity__AssetEntity_faces_id",
"AssetEntity__AssetEntity_faces"."assetId" AS "AssetEntity__AssetEntity_faces_assetId",
"AssetEntity__AssetEntity_faces"."personId" AS "AssetEntity__AssetEntity_faces_personId",
@ -400,7 +402,8 @@ SELECT
"AssetFaceEntity__AssetFaceEntity_asset"."livePhotoVideoId" AS "AssetFaceEntity__AssetFaceEntity_asset_livePhotoVideoId",
"AssetFaceEntity__AssetFaceEntity_asset"."originalFileName" AS "AssetFaceEntity__AssetFaceEntity_asset_originalFileName",
"AssetFaceEntity__AssetFaceEntity_asset"."sidecarPath" AS "AssetFaceEntity__AssetFaceEntity_asset_sidecarPath",
"AssetFaceEntity__AssetFaceEntity_asset"."stackId" AS "AssetFaceEntity__AssetFaceEntity_asset_stackId"
"AssetFaceEntity__AssetFaceEntity_asset"."stackId" AS "AssetFaceEntity__AssetFaceEntity_asset_stackId",
"AssetFaceEntity__AssetFaceEntity_asset"."duplicateId" AS "AssetFaceEntity__AssetFaceEntity_asset_duplicateId"
FROM
"asset_faces" "AssetFaceEntity"
LEFT JOIN "assets" "AssetFaceEntity__AssetFaceEntity_asset" ON "AssetFaceEntity__AssetFaceEntity_asset"."id" = "AssetFaceEntity"."assetId"

View file

@ -35,6 +35,7 @@ FROM
"asset"."originalFileName" AS "asset_originalFileName",
"asset"."sidecarPath" AS "asset_sidecarPath",
"asset"."stackId" AS "asset_stackId",
"asset"."duplicateId" AS "asset_duplicateId",
"stack"."id" AS "stack_id",
"stack"."primaryAssetId" AS "stack_primaryAssetId",
"stackedAssets"."id" AS "stackedAssets_id",
@ -64,7 +65,8 @@ FROM
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
"stackedAssets"."stackId" AS "stackedAssets_stackId"
"stackedAssets"."stackId" AS "stackedAssets_stackId",
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
FROM
"assets" "asset"
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"
@ -129,6 +131,7 @@ SELECT
"asset"."originalFileName" AS "asset_originalFileName",
"asset"."sidecarPath" AS "asset_sidecarPath",
"asset"."stackId" AS "asset_stackId",
"asset"."duplicateId" AS "asset_duplicateId",
"stack"."id" AS "stack_id",
"stack"."primaryAssetId" AS "stack_primaryAssetId",
"stackedAssets"."id" AS "stackedAssets_id",
@ -158,7 +161,8 @@ SELECT
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
"stackedAssets"."stackId" AS "stackedAssets_stackId"
"stackedAssets"."stackId" AS "stackedAssets_stackId",
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
FROM
"assets" "asset"
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"
@ -185,6 +189,35 @@ LIMIT
101
COMMIT
-- SearchRepository.searchDuplicates
WITH
"cte" AS (
SELECT
"asset"."duplicateId" AS "duplicateId",
"search"."assetId" AS "assetId",
"search"."embedding" <= > $1 AS "distance"
FROM
"assets" "asset"
INNER JOIN "smart_search" "search" ON "search"."assetId" = "asset"."id"
WHERE
(
"asset"."ownerId" IN ($2)
AND "asset"."id" != $3
AND "asset"."isVisible" = $4
)
AND ("asset"."deletedAt" IS NULL)
ORDER BY
"search"."embedding" <= > $1 ASC
LIMIT
64
)
SELECT
res.*
FROM
"cte" "res"
WHERE
res.distance <= $5
-- SearchRepository.searchFaces
START TRANSACTION
SET
@ -337,6 +370,7 @@ SELECT
"asset"."originalFileName" AS "asset_originalFileName",
"asset"."sidecarPath" AS "asset_sidecarPath",
"asset"."stackId" AS "asset_stackId",
"asset"."duplicateId" AS "asset_duplicateId",
"exif"."assetId" AS "exif_assetId",
"exif"."description" AS "exif_description",
"exif"."exifImageWidth" AS "exif_exifImageWidth",

View file

@ -49,6 +49,7 @@ FROM
"SharedLinkEntity__SharedLinkEntity_assets"."originalFileName" AS "SharedLinkEntity__SharedLinkEntity_assets_originalFileName",
"SharedLinkEntity__SharedLinkEntity_assets"."sidecarPath" AS "SharedLinkEntity__SharedLinkEntity_assets_sidecarPath",
"SharedLinkEntity__SharedLinkEntity_assets"."stackId" AS "SharedLinkEntity__SharedLinkEntity_assets_stackId",
"SharedLinkEntity__SharedLinkEntity_assets"."duplicateId" AS "SharedLinkEntity__SharedLinkEntity_assets_duplicateId",
"9b1d35b344d838023994a3233afd6ffe098be6d8"."assetId" AS "9b1d35b344d838023994a3233afd6ffe098be6d8_assetId",
"9b1d35b344d838023994a3233afd6ffe098be6d8"."description" AS "9b1d35b344d838023994a3233afd6ffe098be6d8_description",
"9b1d35b344d838023994a3233afd6ffe098be6d8"."exifImageWidth" AS "9b1d35b344d838023994a3233afd6ffe098be6d8_exifImageWidth",
@ -115,6 +116,7 @@ FROM
"4a35f463ae8c5544ede95c4b6d9ce8c686b6bfe6"."originalFileName" AS "4a35f463ae8c5544ede95c4b6d9ce8c686b6bfe6_originalFileName",
"4a35f463ae8c5544ede95c4b6d9ce8c686b6bfe6"."sidecarPath" AS "4a35f463ae8c5544ede95c4b6d9ce8c686b6bfe6_sidecarPath",
"4a35f463ae8c5544ede95c4b6d9ce8c686b6bfe6"."stackId" AS "4a35f463ae8c5544ede95c4b6d9ce8c686b6bfe6_stackId",
"4a35f463ae8c5544ede95c4b6d9ce8c686b6bfe6"."duplicateId" AS "4a35f463ae8c5544ede95c4b6d9ce8c686b6bfe6_duplicateId",
"d9f2f4dd8920bad1d6907cdb1d699732daff3c2f"."assetId" AS "d9f2f4dd8920bad1d6907cdb1d699732daff3c2f_assetId",
"d9f2f4dd8920bad1d6907cdb1d699732daff3c2f"."description" AS "d9f2f4dd8920bad1d6907cdb1d699732daff3c2f_description",
"d9f2f4dd8920bad1d6907cdb1d699732daff3c2f"."exifImageWidth" AS "d9f2f4dd8920bad1d6907cdb1d699732daff3c2f_exifImageWidth",
@ -237,6 +239,7 @@ SELECT
"SharedLinkEntity__SharedLinkEntity_assets"."originalFileName" AS "SharedLinkEntity__SharedLinkEntity_assets_originalFileName",
"SharedLinkEntity__SharedLinkEntity_assets"."sidecarPath" AS "SharedLinkEntity__SharedLinkEntity_assets_sidecarPath",
"SharedLinkEntity__SharedLinkEntity_assets"."stackId" AS "SharedLinkEntity__SharedLinkEntity_assets_stackId",
"SharedLinkEntity__SharedLinkEntity_assets"."duplicateId" AS "SharedLinkEntity__SharedLinkEntity_assets_duplicateId",
"SharedLinkEntity__SharedLinkEntity_album"."id" AS "SharedLinkEntity__SharedLinkEntity_album_id",
"SharedLinkEntity__SharedLinkEntity_album"."ownerId" AS "SharedLinkEntity__SharedLinkEntity_album_ownerId",
"SharedLinkEntity__SharedLinkEntity_album"."albumName" AS "SharedLinkEntity__SharedLinkEntity_album_albumName",

View file

@ -18,6 +18,7 @@ import {
AssetStats,
AssetStatsOptions,
AssetUpdateAllOptions,
AssetUpdateDuplicateOptions,
AssetUpdateOptions,
IAssetRepository,
LivePhotoSearchOptions,
@ -73,7 +74,7 @@ export class AssetRepository implements IAssetRepository {
await this.exifRepository.upsert(exif, { conflictPaths: ['assetId'] });
}
async upsertJobStatus(jobStatus: Partial<AssetJobStatusEntity>): Promise<void> {
async upsertJobStatus(...jobStatus: Partial<AssetJobStatusEntity>[]): Promise<void> {
await this.jobStatusRepository.upsert(jobStatus, { conflictPaths: ['assetId'] });
}
@ -257,6 +258,21 @@ export class AssetRepository implements IAssetRepository {
await this.repository.update({ id: In(ids) }, options);
}
@GenerateSql({
params: [{ targetDuplicateId: DummyValue.UUID, duplicateIds: [DummyValue.UUID], assetIds: [DummyValue.UUID] }],
})
async updateDuplicates(options: AssetUpdateDuplicateOptions): Promise<void> {
await this.repository
.createQueryBuilder()
.update()
.set({ duplicateId: options.targetDuplicateId })
.where({
duplicateId: In(options.duplicateIds),
})
.orWhere({ id: In(options.assetIds) })
.execute();
}
@Chunked()
async softDeleteAll(ids: string[]): Promise<void> {
await this.repository.softDelete({ id: In(ids) });
@ -375,6 +391,18 @@ export class AssetRepository implements IAssetRepository {
break;
}
case WithoutProperty.DUPLICATE: {
where = {
previewPath: Not(IsNull()),
isVisible: true,
smartSearch: true,
jobStatus: {
duplicatesDetectedAt: IsNull(),
},
};
break;
}
case WithoutProperty.OBJECT_TAGS: {
relations = {
smartInfo: true,
@ -614,6 +642,13 @@ export class AssetRepository implements IAssetRepository {
);
}
@GenerateSql({ params: [{ userIds: [DummyValue.UUID, DummyValue.UUID] }] })
getDuplicates(options: AssetBuilderOptions): Promise<AssetEntity[]> {
return this.getBuilder({ ...options, isDuplicate: true })
.orderBy('asset.duplicateId')
.getMany();
}
@GenerateSql({ params: [DummyValue.UUID, { minAssetsPerField: 5, maxFields: 12 }] })
async getAssetIdByCity(
ownerId: string,
@ -673,16 +708,14 @@ export class AssetRepository implements IAssetRepository {
}
private getBuilder(options: AssetBuilderOptions) {
const { isArchived, isFavorite, isTrashed, albumId, personId, userIds, withStacked, exifInfo, assetType } = options;
const builder = this.repository.createQueryBuilder('asset').where('asset.isVisible = true');
if (assetType !== undefined) {
builder.andWhere('asset.type = :assetType', { assetType });
if (options.assetType !== undefined) {
builder.andWhere('asset.type = :assetType', { assetType: options.assetType });
}
let stackJoined = false;
if (exifInfo !== false) {
if (options.exifInfo !== false) {
stackJoined = true;
builder
.leftJoinAndSelect('asset.exifInfo', 'exifInfo')
@ -690,34 +723,38 @@ export class AssetRepository implements IAssetRepository {
.leftJoinAndSelect('stack.assets', 'stackedAssets');
}
if (albumId) {
builder.leftJoin('asset.albums', 'album').andWhere('album.id = :albumId', { albumId });
if (options.albumId) {
builder.leftJoin('asset.albums', 'album').andWhere('album.id = :albumId', { albumId: options.albumId });
}
if (userIds) {
builder.andWhere('asset.ownerId IN (:...userIds )', { userIds });
if (options.userIds) {
builder.andWhere('asset.ownerId IN (:...userIds )', { userIds: options.userIds });
}
if (isArchived !== undefined) {
builder.andWhere('asset.isArchived = :isArchived', { isArchived });
if (options.isArchived !== undefined) {
builder.andWhere('asset.isArchived = :isArchived', { isArchived: options.isArchived });
}
if (isFavorite !== undefined) {
builder.andWhere('asset.isFavorite = :isFavorite', { isFavorite });
if (options.isFavorite !== undefined) {
builder.andWhere('asset.isFavorite = :isFavorite', { isFavorite: options.isFavorite });
}
if (isTrashed !== undefined) {
builder.andWhere(`asset.deletedAt ${isTrashed ? 'IS NOT NULL' : 'IS NULL'}`).withDeleted();
if (options.isTrashed !== undefined) {
builder.andWhere(`asset.deletedAt ${options.isTrashed ? 'IS NOT NULL' : 'IS NULL'}`).withDeleted();
}
if (personId !== undefined) {
if (options.isDuplicate !== undefined) {
builder.andWhere(`asset.duplicateId ${options.isDuplicate ? 'IS NOT NULL' : 'IS NULL'}`);
}
if (options.personId !== undefined) {
builder
.innerJoin('asset.faces', 'faces')
.innerJoin('faces.person', 'person')
.andWhere('person.id = :personId', { personId });
.andWhere('person.id = :personId', { personId: options.personId });
}
if (withStacked) {
if (options.withStacked) {
if (!stackJoined) {
builder.leftJoinAndSelect('asset.stack', 'stack').leftJoinAndSelect('stack.assets', 'stackedAssets');
}

View file

@ -65,6 +65,10 @@ export const JOBS_TO_QUEUE: Record<JobName, QueueName> = {
[JobName.QUEUE_SMART_SEARCH]: QueueName.SMART_SEARCH,
[JobName.SMART_SEARCH]: QueueName.SMART_SEARCH,
// duplicate detection
[JobName.QUEUE_DUPLICATE_DETECTION]: QueueName.DUPLICATE_DETECTION,
[JobName.DUPLICATE_DETECTION]: QueueName.DUPLICATE_DETECTION,
// XMP sidecars
[JobName.QUEUE_SIDECAR]: QueueName.SIDECAR,
[JobName.SIDECAR_DISCOVERY]: QueueName.SIDECAR,

View file

@ -10,6 +10,8 @@ import { SmartSearchEntity } from 'src/entities/smart-search.entity';
import { DatabaseExtension } from 'src/interfaces/database.interface';
import { ILoggerRepository } from 'src/interfaces/logger.interface';
import {
AssetDuplicateResult,
AssetDuplicateSearch,
AssetSearchOptions,
FaceEmbeddingSearch,
FaceSearchResult,
@ -145,6 +147,44 @@ export class SearchRepository implements ISearchRepository {
return results;
}
@GenerateSql({
params: [
{
embedding: Array.from({ length: 512 }, Math.random),
maxDistance: 0.6,
userIds: [DummyValue.UUID],
},
],
})
searchDuplicates({
assetId,
embedding,
maxDistance,
userIds,
}: AssetDuplicateSearch): Promise<AssetDuplicateResult[]> {
const cte = this.assetRepository.createQueryBuilder('asset');
cte
.select('search.assetId', 'assetId')
.addSelect('asset.duplicateId', 'duplicateId')
.addSelect(`search.embedding <=> :embedding`, 'distance')
.innerJoin('asset.smartSearch', 'search')
.where('asset.ownerId IN (:...userIds )')
.andWhere('asset.id != :assetId')
.andWhere('asset.isVisible = :isVisible')
.orderBy('search.embedding <=> :embedding')
.limit(64)
.setParameters({ assetId, embedding: asVector(embedding), isVisible: true, userIds });
const builder = this.assetRepository.manager
.createQueryBuilder()
.addCommonTableExpression(cte, 'cte')
.from('cte', 'res')
.select('res.*')
.where('res.distance <= :maxDistance', { maxDistance });
return builder.getRawMany() as any as Promise<AssetDuplicateResult[]>;
}
@GenerateSql({
params: [
{

View file

@ -286,6 +286,11 @@ export class AssetService {
return data;
}
async getDuplicates(auth: AuthDto): Promise<AssetResponseDto[]> {
const res = await this.assetRepository.getDuplicates({ userIds: [auth.user.id] });
return res.map((a) => mapAsset(a, { auth }));
}
async update(auth: AuthDto, id: string, dto: UpdateAssetDto): Promise<AssetResponseDto> {
await this.access.requirePermission(auth, Permission.ASSET_UPDATE, id);

View file

@ -109,6 +109,7 @@ describe(JobService.name, () => {
await expect(sut.getAllJobsStatus()).resolves.toEqual({
[QueueName.BACKGROUND_TASK]: expectedJobStatus,
[QueueName.DUPLICATE_DETECTION]: expectedJobStatus,
[QueueName.SMART_SEARCH]: expectedJobStatus,
[QueueName.METADATA_EXTRACTION]: expectedJobStatus,
[QueueName.SEARCH]: expectedJobStatus,

View file

@ -115,6 +115,10 @@ export class JobService {
return this.jobRepository.queue({ name: JobName.QUEUE_SMART_SEARCH, data: { force } });
}
case QueueName.DUPLICATE_DETECTION: {
return this.jobRepository.queue({ name: JobName.QUEUE_DUPLICATE_DETECTION, data: { force } });
}
case QueueName.METADATA_EXTRACTION: {
return this.jobRepository.queue({ name: JobName.QUEUE_METADATA_EXTRACTION, data: { force } });
}
@ -191,7 +195,11 @@ export class JobService {
}
private isConcurrentQueue(name: QueueName): name is ConcurrentQueueName {
return ![QueueName.FACIAL_RECOGNITION, QueueName.STORAGE_TEMPLATE_MIGRATION].includes(name);
return ![
QueueName.FACIAL_RECOGNITION,
QueueName.STORAGE_TEMPLATE_MIGRATION,
QueueName.DUPLICATE_DETECTION,
].includes(name);
}
async handleNightlyJobs() {
@ -294,6 +302,13 @@ export class JobService {
break;
}
case JobName.SMART_SEARCH: {
if (item.data.source === 'upload') {
await this.jobRepository.queue({ name: JobName.DUPLICATE_DETECTION, data: item.data });
}
break;
}
case JobName.USER_DELETION: {
this.eventRepository.clientBroadcast(ClientEvent.USER_DELETE, item.data.id);
break;

View file

@ -9,6 +9,7 @@ import { MediaService } from 'src/services/media.service';
import { MetadataService } from 'src/services/metadata.service';
import { NotificationService } from 'src/services/notification.service';
import { PersonService } from 'src/services/person.service';
import { SearchService } from 'src/services/search.service';
import { SessionService } from 'src/services/session.service';
import { SmartInfoService } from 'src/services/smart-info.service';
import { StorageTemplateService } from 'src/services/storage-template.service';
@ -35,6 +36,7 @@ export class MicroservicesService {
private storageTemplateService: StorageTemplateService,
private storageService: StorageService,
private userService: UserService,
private searchService: SearchService,
) {}
async init() {
@ -53,6 +55,8 @@ export class MicroservicesService {
[JobName.USER_SYNC_USAGE]: () => this.userService.handleUserSyncUsage(),
[JobName.QUEUE_SMART_SEARCH]: (data) => this.smartInfoService.handleQueueEncodeClip(data),
[JobName.SMART_SEARCH]: (data) => this.smartInfoService.handleEncodeClip(data),
[JobName.QUEUE_DUPLICATE_DETECTION]: (data) => this.searchService.handleQueueSearchDuplicates(data),
[JobName.DUPLICATE_DETECTION]: (data) => this.searchService.handleSearchDuplicates(data),
[JobName.STORAGE_TEMPLATE_MIGRATION]: () => this.storageTemplateService.handleMigration(),
[JobName.STORAGE_TEMPLATE_MIGRATION_SINGLE]: (data) => this.storageTemplateService.handleMigrationSingle(data),
[JobName.QUEUE_MIGRATION]: () => this.mediaService.handleQueueMigration(),

View file

@ -1,5 +1,7 @@
import { mapAsset } from 'src/dtos/asset-response.dto';
import { IAssetRepository } from 'src/interfaces/asset.interface';
import { IAssetRepository, WithoutProperty } from 'src/interfaces/asset.interface';
import { ICryptoRepository } from 'src/interfaces/crypto.interface';
import { IJobRepository, JobName, JobStatus } from 'src/interfaces/job.interface';
import { ILoggerRepository } from 'src/interfaces/logger.interface';
import { IMachineLearningRepository } from 'src/interfaces/machine-learning.interface';
import { IMetadataRepository } from 'src/interfaces/metadata.interface';
@ -12,6 +14,8 @@ import { assetStub } from 'test/fixtures/asset.stub';
import { authStub } from 'test/fixtures/auth.stub';
import { personStub } from 'test/fixtures/person.stub';
import { newAssetRepositoryMock } from 'test/repositories/asset.repository.mock';
import { newCryptoRepositoryMock } from 'test/repositories/crypto.repository.mock';
import { newJobRepositoryMock } from 'test/repositories/job.repository.mock';
import { newLoggerRepositoryMock } from 'test/repositories/logger.repository.mock';
import { newMachineLearningRepositoryMock } from 'test/repositories/machine-learning.repository.mock';
import { newMetadataRepositoryMock } from 'test/repositories/metadata.repository.mock';
@ -19,7 +23,7 @@ import { newPartnerRepositoryMock } from 'test/repositories/partner.repository.m
import { newPersonRepositoryMock } from 'test/repositories/person.repository.mock';
import { newSearchRepositoryMock } from 'test/repositories/search.repository.mock';
import { newSystemMetadataRepositoryMock } from 'test/repositories/system-metadata.repository.mock';
import { Mocked, vitest } from 'vitest';
import { Mocked, beforeEach, vitest } from 'vitest';
vitest.useFakeTimers();
@ -33,6 +37,8 @@ describe(SearchService.name, () => {
let partnerMock: Mocked<IPartnerRepository>;
let metadataMock: Mocked<IMetadataRepository>;
let loggerMock: Mocked<ILoggerRepository>;
let cryptoMock: Mocked<ICryptoRepository>;
let jobMock: Mocked<IJobRepository>;
beforeEach(() => {
assetMock = newAssetRepositoryMock();
@ -43,6 +49,8 @@ describe(SearchService.name, () => {
partnerMock = newPartnerRepositoryMock();
metadataMock = newMetadataRepositoryMock();
loggerMock = newLoggerRepositoryMock();
cryptoMock = newCryptoRepositoryMock();
jobMock = newJobRepositoryMock();
sut = new SearchService(
systemMock,
@ -53,6 +61,8 @@ describe(SearchService.name, () => {
partnerMock,
metadataMock,
loggerMock,
cryptoMock,
jobMock,
);
});
@ -76,15 +86,15 @@ describe(SearchService.name, () => {
describe('getExploreData', () => {
it('should get assets by city and tag', async () => {
assetMock.getAssetIdByCity.mockResolvedValueOnce({
assetMock.getAssetIdByCity.mockResolvedValue({
fieldName: 'exifInfo.city',
items: [{ value: 'Paris', data: assetStub.image.id }],
});
assetMock.getAssetIdByTag.mockResolvedValueOnce({
assetMock.getAssetIdByTag.mockResolvedValue({
fieldName: 'smartInfo.tags',
items: [{ value: 'train', data: assetStub.imageFrom2015.id }],
});
assetMock.getByIdsWithAllRelations.mockResolvedValueOnce([assetStub.image, assetStub.imageFrom2015]);
assetMock.getByIdsWithAllRelations.mockResolvedValue([assetStub.image, assetStub.imageFrom2015]);
const expectedResponse = [
{ fieldName: 'exifInfo.city', items: [{ value: 'Paris', data: mapAsset(assetStub.image) }] },
{ fieldName: 'smartInfo.tags', items: [{ value: 'train', data: mapAsset(assetStub.imageFrom2015) }] },
@ -95,4 +105,234 @@ describe(SearchService.name, () => {
expect(result).toEqual(expectedResponse);
});
});
describe('handleQueueSearchDuplicates', () => {
beforeEach(() => {
systemMock.get.mockResolvedValue({
machineLearning: {
enabled: true,
duplicateDetection: {
enabled: true,
},
},
});
});
it('should skip if machine learning is disabled', async () => {
systemMock.get.mockResolvedValue({
machineLearning: {
enabled: false,
duplicateDetection: {
enabled: true,
},
},
});
await expect(sut.handleQueueSearchDuplicates({})).resolves.toBe(JobStatus.SKIPPED);
expect(jobMock.queue).not.toHaveBeenCalled();
expect(jobMock.queueAll).not.toHaveBeenCalled();
expect(systemMock.get).toHaveBeenCalled();
});
it('should skip if duplicate detection is disabled', async () => {
systemMock.get.mockResolvedValue({
machineLearning: {
enabled: true,
duplicateDetection: {
enabled: false,
},
},
});
await expect(sut.handleQueueSearchDuplicates({})).resolves.toBe(JobStatus.SKIPPED);
expect(jobMock.queue).not.toHaveBeenCalled();
expect(jobMock.queueAll).not.toHaveBeenCalled();
expect(systemMock.get).toHaveBeenCalled();
});
it('should queue missing assets', async () => {
assetMock.getWithout.mockResolvedValue({
items: [assetStub.image],
hasNextPage: false,
});
await sut.handleQueueSearchDuplicates({});
expect(assetMock.getWithout).toHaveBeenCalledWith({ skip: 0, take: 1000 }, WithoutProperty.DUPLICATE);
expect(jobMock.queueAll).toHaveBeenCalledWith([
{
name: JobName.DUPLICATE_DETECTION,
data: { id: assetStub.image.id },
},
]);
});
it('should queue all assets', async () => {
assetMock.getAll.mockResolvedValue({
items: [assetStub.image],
hasNextPage: false,
});
personMock.getAll.mockResolvedValue({
items: [personStub.withName],
hasNextPage: false,
});
await sut.handleQueueSearchDuplicates({ force: true });
expect(assetMock.getAll).toHaveBeenCalled();
expect(jobMock.queueAll).toHaveBeenCalledWith([
{
name: JobName.DUPLICATE_DETECTION,
data: { id: assetStub.image.id },
},
]);
});
});
describe('handleSearchDuplicates', () => {
beforeEach(() => {
systemMock.get.mockResolvedValue({
machineLearning: {
enabled: true,
duplicateDetection: {
enabled: true,
},
},
});
});
it('should skip if machine learning is disabled', async () => {
systemMock.get.mockResolvedValue({
machineLearning: {
enabled: false,
duplicateDetection: {
enabled: true,
},
},
});
const id = assetStub.livePhotoMotionAsset.id;
assetMock.getById.mockResolvedValue(assetStub.livePhotoMotionAsset);
const result = await sut.handleSearchDuplicates({ id });
expect(result).toBe(JobStatus.SKIPPED);
});
it('should skip if duplicate detection is disabled', async () => {
systemMock.get.mockResolvedValue({
machineLearning: {
enabled: true,
duplicateDetection: {
enabled: false,
},
},
});
const id = assetStub.livePhotoMotionAsset.id;
assetMock.getById.mockResolvedValue(assetStub.livePhotoMotionAsset);
const result = await sut.handleSearchDuplicates({ id });
expect(result).toBe(JobStatus.SKIPPED);
});
it('should fail if asset is not found', async () => {
const result = await sut.handleSearchDuplicates({ id: assetStub.image.id });
expect(result).toBe(JobStatus.FAILED);
expect(loggerMock.error).toHaveBeenCalledWith(`Asset ${assetStub.image.id} not found`);
});
it('should skip if asset is not visible', async () => {
const id = assetStub.livePhotoMotionAsset.id;
assetMock.getById.mockResolvedValue(assetStub.livePhotoMotionAsset);
const result = await sut.handleSearchDuplicates({ id });
expect(result).toBe(JobStatus.SKIPPED);
expect(loggerMock.debug).toHaveBeenCalledWith(`Asset ${id} is not visible, skipping`);
});
it('should fail if asset is missing preview image', async () => {
assetMock.getById.mockResolvedValue(assetStub.noResizePath);
const result = await sut.handleSearchDuplicates({ id: assetStub.noResizePath.id });
expect(result).toBe(JobStatus.FAILED);
expect(loggerMock.warn).toHaveBeenCalledWith(`Asset ${assetStub.noResizePath.id} is missing preview image`);
});
it('should fail if asset is missing embedding', async () => {
assetMock.getById.mockResolvedValue(assetStub.image);
const result = await sut.handleSearchDuplicates({ id: assetStub.image.id });
expect(result).toBe(JobStatus.FAILED);
expect(loggerMock.debug).toHaveBeenCalledWith(`Asset ${assetStub.image.id} is missing embedding`);
});
it('should search for duplicates and update asset with duplicateId', async () => {
assetMock.getById.mockResolvedValue(assetStub.hasEmbedding);
searchMock.searchDuplicates.mockResolvedValue([
{ assetId: assetStub.image.id, distance: 0.01, duplicateId: null },
]);
const expectedAssetIds = [assetStub.image.id, assetStub.hasEmbedding.id];
const result = await sut.handleSearchDuplicates({ id: assetStub.hasEmbedding.id });
expect(result).toBe(JobStatus.SUCCESS);
expect(searchMock.searchDuplicates).toHaveBeenCalledWith({
assetId: assetStub.hasEmbedding.id,
embedding: assetStub.hasEmbedding.smartSearch!.embedding,
maxDistance: 0.03,
userIds: [assetStub.hasEmbedding.ownerId],
});
expect(assetMock.updateDuplicates).toHaveBeenCalledWith({
assetIds: expectedAssetIds,
targetDuplicateId: expect.any(String),
duplicateIds: [],
});
expect(assetMock.upsertJobStatus).toHaveBeenCalledWith(
...expectedAssetIds.map((assetId) => ({ assetId, duplicatesDetectedAt: expect.any(Date) })),
);
});
it('should use existing duplicate ID among matched duplicates', async () => {
const duplicateId = assetStub.hasDupe.duplicateId;
assetMock.getById.mockResolvedValue(assetStub.hasEmbedding);
searchMock.searchDuplicates.mockResolvedValue([{ assetId: assetStub.hasDupe.id, distance: 0.01, duplicateId }]);
const expectedAssetIds = [assetStub.hasEmbedding.id];
const result = await sut.handleSearchDuplicates({ id: assetStub.hasEmbedding.id });
expect(result).toBe(JobStatus.SUCCESS);
expect(searchMock.searchDuplicates).toHaveBeenCalledWith({
assetId: assetStub.hasEmbedding.id,
embedding: assetStub.hasEmbedding.smartSearch!.embedding,
maxDistance: 0.03,
userIds: [assetStub.hasEmbedding.ownerId],
});
expect(assetMock.updateDuplicates).toHaveBeenCalledWith({
assetIds: expectedAssetIds,
targetDuplicateId: assetStub.hasDupe.duplicateId,
duplicateIds: [],
});
expect(assetMock.upsertJobStatus).toHaveBeenCalledWith(
...expectedAssetIds.map((assetId) => ({ assetId, duplicatesDetectedAt: expect.any(Date) })),
);
});
it('should remove duplicateId if no duplicates found and asset has duplicateId', async () => {
assetMock.getById.mockResolvedValue(assetStub.hasDupe);
searchMock.searchDuplicates.mockResolvedValue([]);
const result = await sut.handleSearchDuplicates({ id: assetStub.hasDupe.id });
expect(result).toBe(JobStatus.SUCCESS);
expect(assetMock.update).toHaveBeenCalledWith({ id: assetStub.hasDupe.id, duplicateId: null });
expect(assetMock.upsertJobStatus).toHaveBeenCalledWith({
assetId: assetStub.hasDupe.id,
duplicatesDetectedAt: expect.any(Date),
});
});
});
});

View file

@ -16,15 +16,25 @@ import {
} from 'src/dtos/search.dto';
import { AssetOrder } from 'src/entities/album.entity';
import { AssetEntity } from 'src/entities/asset.entity';
import { IAssetRepository } from 'src/interfaces/asset.interface';
import { IAssetRepository, WithoutProperty } from 'src/interfaces/asset.interface';
import { ICryptoRepository } from 'src/interfaces/crypto.interface';
import {
IBaseJob,
IEntityJob,
IJobRepository,
JOBS_ASSET_PAGINATION_SIZE,
JobName,
JobStatus,
} from 'src/interfaces/job.interface';
import { ILoggerRepository } from 'src/interfaces/logger.interface';
import { IMachineLearningRepository } from 'src/interfaces/machine-learning.interface';
import { IMetadataRepository } from 'src/interfaces/metadata.interface';
import { IPartnerRepository } from 'src/interfaces/partner.interface';
import { IPersonRepository } from 'src/interfaces/person.interface';
import { ISearchRepository, SearchExploreItem } from 'src/interfaces/search.interface';
import { AssetDuplicateResult, ISearchRepository, SearchExploreItem } from 'src/interfaces/search.interface';
import { ISystemMetadataRepository } from 'src/interfaces/system-metadata.interface';
import { isSmartSearchEnabled } from 'src/utils/misc';
import { isDuplicateDetectionEnabled, isSmartSearchEnabled } from 'src/utils/misc';
import { usePagination } from 'src/utils/pagination';
@Injectable()
export class SearchService {
@ -39,6 +49,8 @@ export class SearchService {
@Inject(IPartnerRepository) private partnerRepository: IPartnerRepository,
@Inject(IMetadataRepository) private metadataRepository: IMetadataRepository,
@Inject(ILoggerRepository) private logger: ILoggerRepository,
@Inject(ICryptoRepository) private cryptoRepository: ICryptoRepository,
@Inject(IJobRepository) private jobRepository: IJobRepository,
) {
this.logger.setContext(SearchService.name);
this.configCore = SystemConfigCore.create(systemMetadataRepository, logger);
@ -147,6 +159,97 @@ export class SearchService {
}
}
async handleQueueSearchDuplicates({ force }: IBaseJob): Promise<JobStatus> {
const { machineLearning } = await this.configCore.getConfig();
if (!isDuplicateDetectionEnabled(machineLearning)) {
return JobStatus.SKIPPED;
}
const assetPagination = usePagination(JOBS_ASSET_PAGINATION_SIZE, (pagination) => {
return force
? this.assetRepository.getAll(pagination, { isVisible: true })
: this.assetRepository.getWithout(pagination, WithoutProperty.DUPLICATE);
});
for await (const assets of assetPagination) {
await this.jobRepository.queueAll(
assets.map((asset) => ({ name: JobName.DUPLICATE_DETECTION, data: { id: asset.id } })),
);
}
return JobStatus.SUCCESS;
}
async handleSearchDuplicates({ id }: IEntityJob): Promise<JobStatus> {
const { machineLearning } = await this.configCore.getConfig();
if (!isDuplicateDetectionEnabled(machineLearning)) {
return JobStatus.SKIPPED;
}
const asset = await this.assetRepository.getById(id, { smartSearch: true });
if (!asset) {
this.logger.error(`Asset ${id} not found`);
return JobStatus.FAILED;
}
if (!asset.isVisible) {
this.logger.debug(`Asset ${id} is not visible, skipping`);
return JobStatus.SKIPPED;
}
if (!asset.previewPath) {
this.logger.warn(`Asset ${id} is missing preview image`);
return JobStatus.FAILED;
}
if (!asset.smartSearch?.embedding) {
this.logger.debug(`Asset ${id} is missing embedding`);
return JobStatus.FAILED;
}
const duplicateAssets = await this.searchRepository.searchDuplicates({
assetId: asset.id,
embedding: asset.smartSearch.embedding,
maxDistance: machineLearning.duplicateDetection.maxDistance,
userIds: [asset.ownerId],
});
let assetIds = [asset.id];
if (duplicateAssets.length > 0) {
this.logger.debug(
`Found ${duplicateAssets.length} duplicate${duplicateAssets.length === 1 ? '' : 's'} for asset ${asset.id}`,
);
assetIds = await this.updateDuplicates(asset, duplicateAssets);
} else if (asset.duplicateId) {
this.logger.debug(`No duplicates found for asset ${asset.id}, removing duplicateId`);
await this.assetRepository.update({ id: asset.id, duplicateId: null });
}
const duplicatesDetectedAt = new Date();
await this.assetRepository.upsertJobStatus(...assetIds.map((assetId) => ({ assetId, duplicatesDetectedAt })));
return JobStatus.SUCCESS;
}
private async updateDuplicates(asset: AssetEntity, duplicateAssets: AssetDuplicateResult[]): Promise<string[]> {
const duplicateIds = [
...new Set(
duplicateAssets
.filter((asset): asset is AssetDuplicateResult & { duplicateId: string } => !!asset.duplicateId)
.map((duplicate) => duplicate.duplicateId),
),
];
const targetDuplicateId = asset.duplicateId ?? duplicateIds.shift() ?? this.cryptoRepository.randomUUID();
const assetIdsToUpdate = duplicateAssets
.filter((asset) => asset.duplicateId !== targetDuplicateId)
.map((duplicate) => duplicate.assetId);
assetIdsToUpdate.push(asset.id);
await this.assetRepository.updateDuplicates({ targetDuplicateId, assetIds: assetIdsToUpdate, duplicateIds });
return assetIdsToUpdate;
}
private async getUserIdsToSearch(auth: AuthDto): Promise<string[]> {
const userIds: string[] = [auth.user.id];
const partners = await this.partnerRepository.getAll(auth.user.id);

View file

@ -164,6 +164,7 @@ describe(ServerInfoService.name, () => {
it('should respond the server features', async () => {
await expect(sut.getFeatures()).resolves.toEqual({
smartSearch: true,
duplicateDetection: false,
facialRecognition: true,
map: true,
reverseGeocoding: true,

View file

@ -22,7 +22,7 @@ import { ISystemMetadataRepository } from 'src/interfaces/system-metadata.interf
import { IUserRepository, UserStatsQueryResponse } from 'src/interfaces/user.interface';
import { asHumanReadable } from 'src/utils/bytes';
import { mimeTypes } from 'src/utils/mime-types';
import { isFacialRecognitionEnabled, isSmartSearchEnabled } from 'src/utils/misc';
import { isDuplicateDetectionEnabled, isFacialRecognitionEnabled, isSmartSearchEnabled } from 'src/utils/misc';
import { Version } from 'src/utils/version';
@Injectable()
@ -88,6 +88,7 @@ export class ServerInfoService {
return {
smartSearch: isSmartSearchEnabled(machineLearning),
facialRecognition: isFacialRecognitionEnabled(machineLearning),
duplicateDetection: isDuplicateDetectionEnabled(machineLearning),
map: map.enabled,
reverseGeocoding: reverseGeocoding.enabled,
sidecar: true,

View file

@ -79,6 +79,10 @@ const updatedConfig = Object.freeze<SystemConfig>({
enabled: true,
modelName: 'ViT-B-32__openai',
},
duplicateDetection: {
enabled: false,
maxDistance: 0.03,
},
facialRecognition: {
enabled: true,
modelName: 'buffalo_l',

View file

@ -62,6 +62,8 @@ export const isSmartSearchEnabled = (machineLearning: SystemConfig['machineLearn
isMachineLearningEnabled(machineLearning) && machineLearning.clip.enabled;
export const isFacialRecognitionEnabled = (machineLearning: SystemConfig['machineLearning']) =>
isMachineLearningEnabled(machineLearning) && machineLearning.facialRecognition.enabled;
export const isDuplicateDetectionEnabled = (machineLearning: SystemConfig['machineLearning']) =>
isMachineLearningEnabled(machineLearning) && machineLearning.duplicateDetection.enabled;
export const isConnectionAborted = (error: Error | any) => error.code === 'ECONNABORTED';