feat(server): near-duplicate detection (#8228)

* duplicate detection job, entity, config

* queueing

* job panel, update api

* use embedding in db instead of fetching

* disable concurrency

* only queue visible assets

* handle multiple duplicateIds

* update concurrent queue check

* add provider

* add web placeholder, server endpoint, migration, various fixes

* update sql

* select embedding by default

* rename variable

* simplify

* remove separate entity, handle re-running with different threshold, set default back to 0.02

* fix tests

* add tests

* add index to entity

* formatting

* update asset mock

* fix `upsertJobStatus` signature

* update sql

* formatting

* default to 0.03

* optimize clustering

* use asset's `duplicateId` if present

* update sql

* update tests

* expose admin setting

* refactor

* formatting

* skip if ml is disabled

* debug trash e2e

* remove from web

* remove from sidebar

* test if ml is disabled

* update sql

* separate duplicate detection from clip in config, disable by default for now

* fix doc

* lower minimum `maxDistance`

* update api

* Add and Use Duplicate Detection Feature Flag (#9364)

* Add Duplicate Detection Flag

* Use Duplicate Detection Flag

* Attempt Fixes for Failing Checks

* lower minimum `maxDistance`

* fix tests

---------

Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>

* chore: fixes and additions after rebase

* chore: update api (remove new Role enum)

* fix: left join smart search so getAll works without machine learning

* test: trash e2e go back to checking length of assets is zero

* chore: regen api after rebase

* test: fix tests after rebase

* redundant join

---------

Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com>
Co-authored-by: Zack Pollard <zackpollard@ymail.com>
Co-authored-by: Zack Pollard <zack@futo.org>
This commit is contained in:
Mert 2024-05-16 13:08:37 -04:00 committed by GitHub
parent 673e97e71d
commit 64636c0618
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
61 changed files with 1254 additions and 61 deletions

View file

@ -14,6 +14,7 @@ class AllJobStatusResponseDto {
/// Returns a new [AllJobStatusResponseDto] instance.
AllJobStatusResponseDto({
required this.backgroundTask,
required this.duplicateDetection,
required this.faceDetection,
required this.facialRecognition,
required this.library_,
@ -30,6 +31,8 @@ class AllJobStatusResponseDto {
JobStatusDto backgroundTask;
JobStatusDto duplicateDetection;
JobStatusDto faceDetection;
JobStatusDto facialRecognition;
@ -57,6 +60,7 @@ class AllJobStatusResponseDto {
@override
bool operator ==(Object other) => identical(this, other) || other is AllJobStatusResponseDto &&
other.backgroundTask == backgroundTask &&
other.duplicateDetection == duplicateDetection &&
other.faceDetection == faceDetection &&
other.facialRecognition == facialRecognition &&
other.library_ == library_ &&
@ -74,6 +78,7 @@ class AllJobStatusResponseDto {
int get hashCode =>
// ignore: unnecessary_parenthesis
(backgroundTask.hashCode) +
(duplicateDetection.hashCode) +
(faceDetection.hashCode) +
(facialRecognition.hashCode) +
(library_.hashCode) +
@ -88,11 +93,12 @@ class AllJobStatusResponseDto {
(videoConversion.hashCode);
@override
String toString() => 'AllJobStatusResponseDto[backgroundTask=$backgroundTask, faceDetection=$faceDetection, facialRecognition=$facialRecognition, library_=$library_, metadataExtraction=$metadataExtraction, migration=$migration, notifications=$notifications, search=$search, sidecar=$sidecar, smartSearch=$smartSearch, storageTemplateMigration=$storageTemplateMigration, thumbnailGeneration=$thumbnailGeneration, videoConversion=$videoConversion]';
String toString() => 'AllJobStatusResponseDto[backgroundTask=$backgroundTask, duplicateDetection=$duplicateDetection, faceDetection=$faceDetection, facialRecognition=$facialRecognition, library_=$library_, metadataExtraction=$metadataExtraction, migration=$migration, notifications=$notifications, search=$search, sidecar=$sidecar, smartSearch=$smartSearch, storageTemplateMigration=$storageTemplateMigration, thumbnailGeneration=$thumbnailGeneration, videoConversion=$videoConversion]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
json[r'backgroundTask'] = this.backgroundTask;
json[r'duplicateDetection'] = this.duplicateDetection;
json[r'faceDetection'] = this.faceDetection;
json[r'facialRecognition'] = this.facialRecognition;
json[r'library'] = this.library_;
@ -117,6 +123,7 @@ class AllJobStatusResponseDto {
return AllJobStatusResponseDto(
backgroundTask: JobStatusDto.fromJson(json[r'backgroundTask'])!,
duplicateDetection: JobStatusDto.fromJson(json[r'duplicateDetection'])!,
faceDetection: JobStatusDto.fromJson(json[r'faceDetection'])!,
facialRecognition: JobStatusDto.fromJson(json[r'facialRecognition'])!,
library_: JobStatusDto.fromJson(json[r'library'])!,
@ -177,6 +184,7 @@ class AllJobStatusResponseDto {
/// The list of required keys that must be present in a JSON.
static const requiredKeys = <String>{
'backgroundTask',
'duplicateDetection',
'faceDetection',
'facialRecognition',
'library',

View file

@ -0,0 +1,108 @@
//
// AUTO-GENERATED FILE, DO NOT MODIFY!
//
// @dart=2.18
// ignore_for_file: unused_element, unused_import
// ignore_for_file: always_put_required_named_parameters_first
// ignore_for_file: constant_identifier_names
// ignore_for_file: lines_longer_than_80_chars
part of openapi.api;
class DuplicateDetectionConfig {
/// Returns a new [DuplicateDetectionConfig] instance.
DuplicateDetectionConfig({
required this.enabled,
required this.maxDistance,
});
bool enabled;
/// Minimum value: 0.001
/// Maximum value: 0.1
double maxDistance;
@override
bool operator ==(Object other) => identical(this, other) || other is DuplicateDetectionConfig &&
other.enabled == enabled &&
other.maxDistance == maxDistance;
@override
int get hashCode =>
// ignore: unnecessary_parenthesis
(enabled.hashCode) +
(maxDistance.hashCode);
@override
String toString() => 'DuplicateDetectionConfig[enabled=$enabled, maxDistance=$maxDistance]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
json[r'enabled'] = this.enabled;
json[r'maxDistance'] = this.maxDistance;
return json;
}
/// Returns a new [DuplicateDetectionConfig] instance and imports its values from
/// [value] if it's a [Map], null otherwise.
// ignore: prefer_constructors_over_static_methods
static DuplicateDetectionConfig? fromJson(dynamic value) {
if (value is Map) {
final json = value.cast<String, dynamic>();
return DuplicateDetectionConfig(
enabled: mapValueOfType<bool>(json, r'enabled')!,
maxDistance: mapValueOfType<double>(json, r'maxDistance')!,
);
}
return null;
}
static List<DuplicateDetectionConfig> listFromJson(dynamic json, {bool growable = false,}) {
final result = <DuplicateDetectionConfig>[];
if (json is List && json.isNotEmpty) {
for (final row in json) {
final value = DuplicateDetectionConfig.fromJson(row);
if (value != null) {
result.add(value);
}
}
}
return result.toList(growable: growable);
}
static Map<String, DuplicateDetectionConfig> mapFromJson(dynamic json) {
final map = <String, DuplicateDetectionConfig>{};
if (json is Map && json.isNotEmpty) {
json = json.cast<String, dynamic>(); // ignore: parameter_assignments
for (final entry in json.entries) {
final value = DuplicateDetectionConfig.fromJson(entry.value);
if (value != null) {
map[entry.key] = value;
}
}
}
return map;
}
// maps a json object with a list of DuplicateDetectionConfig-objects as value to a dart map
static Map<String, List<DuplicateDetectionConfig>> mapListFromJson(dynamic json, {bool growable = false,}) {
final map = <String, List<DuplicateDetectionConfig>>{};
if (json is Map && json.isNotEmpty) {
// ignore: parameter_assignments
json = json.cast<String, dynamic>();
for (final entry in json.entries) {
map[entry.key] = DuplicateDetectionConfig.listFromJson(entry.value, growable: growable,);
}
}
return map;
}
/// The list of required keys that must be present in a JSON.
static const requiredKeys = <String>{
'enabled',
'maxDistance',
};
}

View file

@ -29,6 +29,7 @@ class JobName {
static const faceDetection = JobName._(r'faceDetection');
static const facialRecognition = JobName._(r'facialRecognition');
static const smartSearch = JobName._(r'smartSearch');
static const duplicateDetection = JobName._(r'duplicateDetection');
static const backgroundTask = JobName._(r'backgroundTask');
static const storageTemplateMigration = JobName._(r'storageTemplateMigration');
static const migration = JobName._(r'migration');
@ -45,6 +46,7 @@ class JobName {
faceDetection,
facialRecognition,
smartSearch,
duplicateDetection,
backgroundTask,
storageTemplateMigration,
migration,
@ -96,6 +98,7 @@ class JobNameTypeTransformer {
case r'faceDetection': return JobName.faceDetection;
case r'facialRecognition': return JobName.facialRecognition;
case r'smartSearch': return JobName.smartSearch;
case r'duplicateDetection': return JobName.duplicateDetection;
case r'backgroundTask': return JobName.backgroundTask;
case r'storageTemplateMigration': return JobName.storageTemplateMigration;
case r'migration': return JobName.migration;

View file

@ -14,6 +14,7 @@ class ServerFeaturesDto {
/// Returns a new [ServerFeaturesDto] instance.
ServerFeaturesDto({
required this.configFile,
required this.duplicateDetection,
required this.email,
required this.facialRecognition,
required this.map,
@ -29,6 +30,8 @@ class ServerFeaturesDto {
bool configFile;
bool duplicateDetection;
bool email;
bool facialRecognition;
@ -54,6 +57,7 @@ class ServerFeaturesDto {
@override
bool operator ==(Object other) => identical(this, other) || other is ServerFeaturesDto &&
other.configFile == configFile &&
other.duplicateDetection == duplicateDetection &&
other.email == email &&
other.facialRecognition == facialRecognition &&
other.map == map &&
@ -70,6 +74,7 @@ class ServerFeaturesDto {
int get hashCode =>
// ignore: unnecessary_parenthesis
(configFile.hashCode) +
(duplicateDetection.hashCode) +
(email.hashCode) +
(facialRecognition.hashCode) +
(map.hashCode) +
@ -83,11 +88,12 @@ class ServerFeaturesDto {
(trash.hashCode);
@override
String toString() => 'ServerFeaturesDto[configFile=$configFile, email=$email, facialRecognition=$facialRecognition, map=$map, oauth=$oauth, oauthAutoLaunch=$oauthAutoLaunch, passwordLogin=$passwordLogin, reverseGeocoding=$reverseGeocoding, search=$search, sidecar=$sidecar, smartSearch=$smartSearch, trash=$trash]';
String toString() => 'ServerFeaturesDto[configFile=$configFile, duplicateDetection=$duplicateDetection, email=$email, facialRecognition=$facialRecognition, map=$map, oauth=$oauth, oauthAutoLaunch=$oauthAutoLaunch, passwordLogin=$passwordLogin, reverseGeocoding=$reverseGeocoding, search=$search, sidecar=$sidecar, smartSearch=$smartSearch, trash=$trash]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
json[r'configFile'] = this.configFile;
json[r'duplicateDetection'] = this.duplicateDetection;
json[r'email'] = this.email;
json[r'facialRecognition'] = this.facialRecognition;
json[r'map'] = this.map;
@ -111,6 +117,7 @@ class ServerFeaturesDto {
return ServerFeaturesDto(
configFile: mapValueOfType<bool>(json, r'configFile')!,
duplicateDetection: mapValueOfType<bool>(json, r'duplicateDetection')!,
email: mapValueOfType<bool>(json, r'email')!,
facialRecognition: mapValueOfType<bool>(json, r'facialRecognition')!,
map: mapValueOfType<bool>(json, r'map')!,
@ -170,6 +177,7 @@ class ServerFeaturesDto {
/// The list of required keys that must be present in a JSON.
static const requiredKeys = <String>{
'configFile',
'duplicateDetection',
'email',
'facialRecognition',
'map',

View file

@ -14,6 +14,7 @@ class SystemConfigMachineLearningDto {
/// Returns a new [SystemConfigMachineLearningDto] instance.
SystemConfigMachineLearningDto({
required this.clip,
required this.duplicateDetection,
required this.enabled,
required this.facialRecognition,
required this.url,
@ -21,6 +22,8 @@ class SystemConfigMachineLearningDto {
CLIPConfig clip;
DuplicateDetectionConfig duplicateDetection;
bool enabled;
RecognitionConfig facialRecognition;
@ -30,6 +33,7 @@ class SystemConfigMachineLearningDto {
@override
bool operator ==(Object other) => identical(this, other) || other is SystemConfigMachineLearningDto &&
other.clip == clip &&
other.duplicateDetection == duplicateDetection &&
other.enabled == enabled &&
other.facialRecognition == facialRecognition &&
other.url == url;
@ -38,16 +42,18 @@ class SystemConfigMachineLearningDto {
int get hashCode =>
// ignore: unnecessary_parenthesis
(clip.hashCode) +
(duplicateDetection.hashCode) +
(enabled.hashCode) +
(facialRecognition.hashCode) +
(url.hashCode);
@override
String toString() => 'SystemConfigMachineLearningDto[clip=$clip, enabled=$enabled, facialRecognition=$facialRecognition, url=$url]';
String toString() => 'SystemConfigMachineLearningDto[clip=$clip, duplicateDetection=$duplicateDetection, enabled=$enabled, facialRecognition=$facialRecognition, url=$url]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
json[r'clip'] = this.clip;
json[r'duplicateDetection'] = this.duplicateDetection;
json[r'enabled'] = this.enabled;
json[r'facialRecognition'] = this.facialRecognition;
json[r'url'] = this.url;
@ -63,6 +69,7 @@ class SystemConfigMachineLearningDto {
return SystemConfigMachineLearningDto(
clip: CLIPConfig.fromJson(json[r'clip'])!,
duplicateDetection: DuplicateDetectionConfig.fromJson(json[r'duplicateDetection'])!,
enabled: mapValueOfType<bool>(json, r'enabled')!,
facialRecognition: RecognitionConfig.fromJson(json[r'facialRecognition'])!,
url: mapValueOfType<String>(json, r'url')!,
@ -114,6 +121,7 @@ class SystemConfigMachineLearningDto {
/// The list of required keys that must be present in a JSON.
static const requiredKeys = <String>{
'clip',
'duplicateDetection',
'enabled',
'facialRecognition',
'url',