mirror of
https://github.com/immich-app/immich
synced 2025-11-14 17:36:12 +00:00
feat(server): near-duplicate detection (#8228)
* duplicate detection job, entity, config * queueing * job panel, update api * use embedding in db instead of fetching * disable concurrency * only queue visible assets * handle multiple duplicateIds * update concurrent queue check * add provider * add web placeholder, server endpoint, migration, various fixes * update sql * select embedding by default * rename variable * simplify * remove separate entity, handle re-running with different threshold, set default back to 0.02 * fix tests * add tests * add index to entity * formatting * update asset mock * fix `upsertJobStatus` signature * update sql * formatting * default to 0.03 * optimize clustering * use asset's `duplicateId` if present * update sql * update tests * expose admin setting * refactor * formatting * skip if ml is disabled * debug trash e2e * remove from web * remove from sidebar * test if ml is disabled * update sql * separate duplicate detection from clip in config, disable by default for now * fix doc * lower minimum `maxDistance` * update api * Add and Use Duplicate Detection Feature Flag (#9364) * Add Duplicate Detection Flag * Use Duplicate Detection Flag * Attempt Fixes for Failing Checks * lower minimum `maxDistance` * fix tests --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> * chore: fixes and additions after rebase * chore: update api (remove new Role enum) * fix: left join smart search so getAll works without machine learning * test: trash e2e go back to checking length of assets is zero * chore: regen api after rebase * test: fix tests after rebase * redundant join --------- Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com> Co-authored-by: Zack Pollard <zackpollard@ymail.com> Co-authored-by: Zack Pollard <zack@futo.org>
This commit is contained in:
parent
673e97e71d
commit
64636c0618
61 changed files with 1254 additions and 61 deletions
|
|
@ -30,6 +30,7 @@ SELECT
|
|||
"entity"."originalFileName" AS "entity_originalFileName",
|
||||
"entity"."sidecarPath" AS "entity_sidecarPath",
|
||||
"entity"."stackId" AS "entity_stackId",
|
||||
"entity"."duplicateId" AS "entity_duplicateId",
|
||||
"exifInfo"."assetId" AS "exifInfo_assetId",
|
||||
"exifInfo"."description" AS "exifInfo_description",
|
||||
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
|
||||
|
|
@ -111,7 +112,8 @@ SELECT
|
|||
"AssetEntity"."livePhotoVideoId" AS "AssetEntity_livePhotoVideoId",
|
||||
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
|
||||
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
|
||||
"AssetEntity"."stackId" AS "AssetEntity_stackId"
|
||||
"AssetEntity"."stackId" AS "AssetEntity_stackId",
|
||||
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId"
|
||||
FROM
|
||||
"assets" "AssetEntity"
|
||||
WHERE
|
||||
|
|
@ -147,6 +149,7 @@ SELECT
|
|||
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
|
||||
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
|
||||
"AssetEntity"."stackId" AS "AssetEntity_stackId",
|
||||
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId",
|
||||
"AssetEntity__AssetEntity_exifInfo"."assetId" AS "AssetEntity__AssetEntity_exifInfo_assetId",
|
||||
"AssetEntity__AssetEntity_exifInfo"."description" AS "AssetEntity__AssetEntity_exifInfo_description",
|
||||
"AssetEntity__AssetEntity_exifInfo"."exifImageWidth" AS "AssetEntity__AssetEntity_exifInfo_exifImageWidth",
|
||||
|
|
@ -230,7 +233,8 @@ SELECT
|
|||
"bd93d5747511a4dad4923546c51365bf1a803774"."livePhotoVideoId" AS "bd93d5747511a4dad4923546c51365bf1a803774_livePhotoVideoId",
|
||||
"bd93d5747511a4dad4923546c51365bf1a803774"."originalFileName" AS "bd93d5747511a4dad4923546c51365bf1a803774_originalFileName",
|
||||
"bd93d5747511a4dad4923546c51365bf1a803774"."sidecarPath" AS "bd93d5747511a4dad4923546c51365bf1a803774_sidecarPath",
|
||||
"bd93d5747511a4dad4923546c51365bf1a803774"."stackId" AS "bd93d5747511a4dad4923546c51365bf1a803774_stackId"
|
||||
"bd93d5747511a4dad4923546c51365bf1a803774"."stackId" AS "bd93d5747511a4dad4923546c51365bf1a803774_stackId",
|
||||
"bd93d5747511a4dad4923546c51365bf1a803774"."duplicateId" AS "bd93d5747511a4dad4923546c51365bf1a803774_duplicateId"
|
||||
FROM
|
||||
"assets" "AssetEntity"
|
||||
LEFT JOIN "exif" "AssetEntity__AssetEntity_exifInfo" ON "AssetEntity__AssetEntity_exifInfo"."assetId" = "AssetEntity"."id"
|
||||
|
|
@ -311,7 +315,8 @@ FROM
|
|||
"AssetEntity"."livePhotoVideoId" AS "AssetEntity_livePhotoVideoId",
|
||||
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
|
||||
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
|
||||
"AssetEntity"."stackId" AS "AssetEntity_stackId"
|
||||
"AssetEntity"."stackId" AS "AssetEntity_stackId",
|
||||
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId"
|
||||
FROM
|
||||
"assets" "AssetEntity"
|
||||
LEFT JOIN "libraries" "AssetEntity__AssetEntity_library" ON "AssetEntity__AssetEntity_library"."id" = "AssetEntity"."libraryId"
|
||||
|
|
@ -407,7 +412,8 @@ SELECT
|
|||
"AssetEntity"."livePhotoVideoId" AS "AssetEntity_livePhotoVideoId",
|
||||
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
|
||||
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
|
||||
"AssetEntity"."stackId" AS "AssetEntity_stackId"
|
||||
"AssetEntity"."stackId" AS "AssetEntity_stackId",
|
||||
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId"
|
||||
FROM
|
||||
"assets" "AssetEntity"
|
||||
WHERE
|
||||
|
|
@ -423,6 +429,15 @@ SET
|
|||
WHERE
|
||||
"id" IN ($2)
|
||||
|
||||
-- AssetRepository.updateDuplicates
|
||||
UPDATE "assets"
|
||||
SET
|
||||
"duplicateId" = $1,
|
||||
"updatedAt" = CURRENT_TIMESTAMP
|
||||
WHERE
|
||||
"duplicateId" IN ($2)
|
||||
OR "id" IN ($3)
|
||||
|
||||
-- AssetRepository.getByChecksum
|
||||
SELECT
|
||||
"AssetEntity"."id" AS "AssetEntity_id",
|
||||
|
|
@ -452,7 +467,8 @@ SELECT
|
|||
"AssetEntity"."livePhotoVideoId" AS "AssetEntity_livePhotoVideoId",
|
||||
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
|
||||
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
|
||||
"AssetEntity"."stackId" AS "AssetEntity_stackId"
|
||||
"AssetEntity"."stackId" AS "AssetEntity_stackId",
|
||||
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId"
|
||||
FROM
|
||||
"assets" "AssetEntity"
|
||||
WHERE
|
||||
|
|
@ -519,7 +535,8 @@ SELECT
|
|||
"AssetEntity"."livePhotoVideoId" AS "AssetEntity_livePhotoVideoId",
|
||||
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
|
||||
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
|
||||
"AssetEntity"."stackId" AS "AssetEntity_stackId"
|
||||
"AssetEntity"."stackId" AS "AssetEntity_stackId",
|
||||
"AssetEntity"."duplicateId" AS "AssetEntity_duplicateId"
|
||||
FROM
|
||||
"assets" "AssetEntity"
|
||||
WHERE
|
||||
|
|
@ -575,6 +592,7 @@ SELECT
|
|||
"asset"."originalFileName" AS "asset_originalFileName",
|
||||
"asset"."sidecarPath" AS "asset_sidecarPath",
|
||||
"asset"."stackId" AS "asset_stackId",
|
||||
"asset"."duplicateId" AS "asset_duplicateId",
|
||||
"exifInfo"."assetId" AS "exifInfo_assetId",
|
||||
"exifInfo"."description" AS "exifInfo_description",
|
||||
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
|
||||
|
|
@ -632,7 +650,8 @@ SELECT
|
|||
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
|
||||
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
|
||||
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
|
||||
"stackedAssets"."stackId" AS "stackedAssets_stackId"
|
||||
"stackedAssets"."stackId" AS "stackedAssets_stackId",
|
||||
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
|
||||
FROM
|
||||
"assets" "asset"
|
||||
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"
|
||||
|
|
@ -713,6 +732,7 @@ SELECT
|
|||
"asset"."originalFileName" AS "asset_originalFileName",
|
||||
"asset"."sidecarPath" AS "asset_sidecarPath",
|
||||
"asset"."stackId" AS "asset_stackId",
|
||||
"asset"."duplicateId" AS "asset_duplicateId",
|
||||
"exifInfo"."assetId" AS "exifInfo_assetId",
|
||||
"exifInfo"."description" AS "exifInfo_description",
|
||||
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
|
||||
|
|
@ -770,7 +790,8 @@ SELECT
|
|||
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
|
||||
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
|
||||
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
|
||||
"stackedAssets"."stackId" AS "stackedAssets_stackId"
|
||||
"stackedAssets"."stackId" AS "stackedAssets_stackId",
|
||||
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
|
||||
FROM
|
||||
"assets" "asset"
|
||||
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"
|
||||
|
|
@ -797,6 +818,112 @@ ORDER BY
|
|||
)::timestamptz DESC,
|
||||
"asset"."fileCreatedAt" DESC
|
||||
|
||||
-- AssetRepository.getDuplicates
|
||||
SELECT
|
||||
"asset"."id" AS "asset_id",
|
||||
"asset"."deviceAssetId" AS "asset_deviceAssetId",
|
||||
"asset"."ownerId" AS "asset_ownerId",
|
||||
"asset"."libraryId" AS "asset_libraryId",
|
||||
"asset"."deviceId" AS "asset_deviceId",
|
||||
"asset"."type" AS "asset_type",
|
||||
"asset"."originalPath" AS "asset_originalPath",
|
||||
"asset"."previewPath" AS "asset_previewPath",
|
||||
"asset"."thumbnailPath" AS "asset_thumbnailPath",
|
||||
"asset"."thumbhash" AS "asset_thumbhash",
|
||||
"asset"."encodedVideoPath" AS "asset_encodedVideoPath",
|
||||
"asset"."createdAt" AS "asset_createdAt",
|
||||
"asset"."updatedAt" AS "asset_updatedAt",
|
||||
"asset"."deletedAt" AS "asset_deletedAt",
|
||||
"asset"."fileCreatedAt" AS "asset_fileCreatedAt",
|
||||
"asset"."localDateTime" AS "asset_localDateTime",
|
||||
"asset"."fileModifiedAt" AS "asset_fileModifiedAt",
|
||||
"asset"."isFavorite" AS "asset_isFavorite",
|
||||
"asset"."isArchived" AS "asset_isArchived",
|
||||
"asset"."isExternal" AS "asset_isExternal",
|
||||
"asset"."isOffline" AS "asset_isOffline",
|
||||
"asset"."checksum" AS "asset_checksum",
|
||||
"asset"."duration" AS "asset_duration",
|
||||
"asset"."isVisible" AS "asset_isVisible",
|
||||
"asset"."livePhotoVideoId" AS "asset_livePhotoVideoId",
|
||||
"asset"."originalFileName" AS "asset_originalFileName",
|
||||
"asset"."sidecarPath" AS "asset_sidecarPath",
|
||||
"asset"."stackId" AS "asset_stackId",
|
||||
"asset"."duplicateId" AS "asset_duplicateId",
|
||||
"exifInfo"."assetId" AS "exifInfo_assetId",
|
||||
"exifInfo"."description" AS "exifInfo_description",
|
||||
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
|
||||
"exifInfo"."exifImageHeight" AS "exifInfo_exifImageHeight",
|
||||
"exifInfo"."fileSizeInByte" AS "exifInfo_fileSizeInByte",
|
||||
"exifInfo"."orientation" AS "exifInfo_orientation",
|
||||
"exifInfo"."dateTimeOriginal" AS "exifInfo_dateTimeOriginal",
|
||||
"exifInfo"."modifyDate" AS "exifInfo_modifyDate",
|
||||
"exifInfo"."timeZone" AS "exifInfo_timeZone",
|
||||
"exifInfo"."latitude" AS "exifInfo_latitude",
|
||||
"exifInfo"."longitude" AS "exifInfo_longitude",
|
||||
"exifInfo"."projectionType" AS "exifInfo_projectionType",
|
||||
"exifInfo"."city" AS "exifInfo_city",
|
||||
"exifInfo"."livePhotoCID" AS "exifInfo_livePhotoCID",
|
||||
"exifInfo"."autoStackId" AS "exifInfo_autoStackId",
|
||||
"exifInfo"."state" AS "exifInfo_state",
|
||||
"exifInfo"."country" AS "exifInfo_country",
|
||||
"exifInfo"."make" AS "exifInfo_make",
|
||||
"exifInfo"."model" AS "exifInfo_model",
|
||||
"exifInfo"."lensModel" AS "exifInfo_lensModel",
|
||||
"exifInfo"."fNumber" AS "exifInfo_fNumber",
|
||||
"exifInfo"."focalLength" AS "exifInfo_focalLength",
|
||||
"exifInfo"."iso" AS "exifInfo_iso",
|
||||
"exifInfo"."exposureTime" AS "exifInfo_exposureTime",
|
||||
"exifInfo"."profileDescription" AS "exifInfo_profileDescription",
|
||||
"exifInfo"."colorspace" AS "exifInfo_colorspace",
|
||||
"exifInfo"."bitsPerSample" AS "exifInfo_bitsPerSample",
|
||||
"exifInfo"."fps" AS "exifInfo_fps",
|
||||
"stack"."id" AS "stack_id",
|
||||
"stack"."primaryAssetId" AS "stack_primaryAssetId",
|
||||
"stackedAssets"."id" AS "stackedAssets_id",
|
||||
"stackedAssets"."deviceAssetId" AS "stackedAssets_deviceAssetId",
|
||||
"stackedAssets"."ownerId" AS "stackedAssets_ownerId",
|
||||
"stackedAssets"."libraryId" AS "stackedAssets_libraryId",
|
||||
"stackedAssets"."deviceId" AS "stackedAssets_deviceId",
|
||||
"stackedAssets"."type" AS "stackedAssets_type",
|
||||
"stackedAssets"."originalPath" AS "stackedAssets_originalPath",
|
||||
"stackedAssets"."previewPath" AS "stackedAssets_previewPath",
|
||||
"stackedAssets"."thumbnailPath" AS "stackedAssets_thumbnailPath",
|
||||
"stackedAssets"."thumbhash" AS "stackedAssets_thumbhash",
|
||||
"stackedAssets"."encodedVideoPath" AS "stackedAssets_encodedVideoPath",
|
||||
"stackedAssets"."createdAt" AS "stackedAssets_createdAt",
|
||||
"stackedAssets"."updatedAt" AS "stackedAssets_updatedAt",
|
||||
"stackedAssets"."deletedAt" AS "stackedAssets_deletedAt",
|
||||
"stackedAssets"."fileCreatedAt" AS "stackedAssets_fileCreatedAt",
|
||||
"stackedAssets"."localDateTime" AS "stackedAssets_localDateTime",
|
||||
"stackedAssets"."fileModifiedAt" AS "stackedAssets_fileModifiedAt",
|
||||
"stackedAssets"."isFavorite" AS "stackedAssets_isFavorite",
|
||||
"stackedAssets"."isArchived" AS "stackedAssets_isArchived",
|
||||
"stackedAssets"."isExternal" AS "stackedAssets_isExternal",
|
||||
"stackedAssets"."isOffline" AS "stackedAssets_isOffline",
|
||||
"stackedAssets"."checksum" AS "stackedAssets_checksum",
|
||||
"stackedAssets"."duration" AS "stackedAssets_duration",
|
||||
"stackedAssets"."isVisible" AS "stackedAssets_isVisible",
|
||||
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
|
||||
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
|
||||
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
|
||||
"stackedAssets"."stackId" AS "stackedAssets_stackId",
|
||||
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
|
||||
FROM
|
||||
"assets" "asset"
|
||||
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"
|
||||
LEFT JOIN "asset_stack" "stack" ON "stack"."id" = "asset"."stackId"
|
||||
LEFT JOIN "assets" "stackedAssets" ON "stackedAssets"."stackId" = "stack"."id"
|
||||
AND ("stackedAssets"."deletedAt" IS NULL)
|
||||
WHERE
|
||||
(
|
||||
"asset"."isVisible" = true
|
||||
AND "asset"."ownerId" IN ($1, $2)
|
||||
AND "asset"."duplicateId" IS NOT NULL
|
||||
)
|
||||
AND ("asset"."deletedAt" IS NULL)
|
||||
ORDER BY
|
||||
"asset"."duplicateId" ASC
|
||||
|
||||
-- AssetRepository.getAssetIdByCity
|
||||
WITH
|
||||
"cities" AS (
|
||||
|
|
@ -887,6 +1014,7 @@ SELECT
|
|||
"asset"."originalFileName" AS "asset_originalFileName",
|
||||
"asset"."sidecarPath" AS "asset_sidecarPath",
|
||||
"asset"."stackId" AS "asset_stackId",
|
||||
"asset"."duplicateId" AS "asset_duplicateId",
|
||||
"exifInfo"."assetId" AS "exifInfo_assetId",
|
||||
"exifInfo"."description" AS "exifInfo_description",
|
||||
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
|
||||
|
|
@ -944,7 +1072,8 @@ SELECT
|
|||
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
|
||||
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
|
||||
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
|
||||
"stackedAssets"."stackId" AS "stackedAssets_stackId"
|
||||
"stackedAssets"."stackId" AS "stackedAssets_stackId",
|
||||
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
|
||||
FROM
|
||||
"assets" "asset"
|
||||
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"
|
||||
|
|
@ -992,6 +1121,7 @@ SELECT
|
|||
"asset"."originalFileName" AS "asset_originalFileName",
|
||||
"asset"."sidecarPath" AS "asset_sidecarPath",
|
||||
"asset"."stackId" AS "asset_stackId",
|
||||
"asset"."duplicateId" AS "asset_duplicateId",
|
||||
"exifInfo"."assetId" AS "exifInfo_assetId",
|
||||
"exifInfo"."description" AS "exifInfo_description",
|
||||
"exifInfo"."exifImageWidth" AS "exifInfo_exifImageWidth",
|
||||
|
|
@ -1049,7 +1179,8 @@ SELECT
|
|||
"stackedAssets"."livePhotoVideoId" AS "stackedAssets_livePhotoVideoId",
|
||||
"stackedAssets"."originalFileName" AS "stackedAssets_originalFileName",
|
||||
"stackedAssets"."sidecarPath" AS "stackedAssets_sidecarPath",
|
||||
"stackedAssets"."stackId" AS "stackedAssets_stackId"
|
||||
"stackedAssets"."stackId" AS "stackedAssets_stackId",
|
||||
"stackedAssets"."duplicateId" AS "stackedAssets_duplicateId"
|
||||
FROM
|
||||
"assets" "asset"
|
||||
LEFT JOIN "exif" "exifInfo" ON "exifInfo"."assetId" = "asset"."id"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue