mirror of
https://github.com/immich-app/immich
synced 2025-10-17 18:19:27 +00:00
Add additional dedupe logic
This commit is contained in:
parent
53680d9643
commit
76109c5c6b
2 changed files with 98 additions and 12 deletions
|
|
@ -11,6 +11,22 @@ describe('choosing a duplicate', () => {
|
|||
expect(suggestDuplicate(assets as AssetResponseDto[])).toEqual(assets[0]);
|
||||
});
|
||||
|
||||
it('prefers DNG over CR2 even when the DNG is smaller', () => {
|
||||
const assets = [
|
||||
{ originalMimeType: 'image/x-canon-cr2', exifInfo: { fileSizeInByte: 500 } },
|
||||
{ originalMimeType: 'image/x-adobe-dng', exifInfo: { fileSizeInByte: 200 } },
|
||||
];
|
||||
expect(suggestDuplicate(assets as AssetResponseDto[])).toEqual(assets[1]);
|
||||
});
|
||||
|
||||
it('prefers HEIC over JPEG even when the HEIC is smaller', () => {
|
||||
const assets = [
|
||||
{ originalMimeType: 'image/jpeg', exifInfo: { fileSizeInByte: 400 } },
|
||||
{ originalMimeType: 'image/heic', exifInfo: { fileSizeInByte: 150 } },
|
||||
];
|
||||
expect(suggestDuplicate(assets as AssetResponseDto[])).toEqual(assets[1]);
|
||||
});
|
||||
|
||||
it('picks the asset with the most exif data if multiple assets have the same file size', () => {
|
||||
const assets = [
|
||||
{ exifInfo: { fileSizeInByte: 200, rating: 5, fNumber: 1 } },
|
||||
|
|
|
|||
|
|
@ -2,10 +2,83 @@ import { getExifCount } from '$lib/utils/exif-utils';
|
|||
import type { AssetResponseDto } from '@immich/sdk';
|
||||
import { sortBy } from 'lodash-es';
|
||||
|
||||
const formatPreferenceGroups: string[][] = [
|
||||
['image/x-adobe-dng', 'image/dng', 'dng'],
|
||||
[
|
||||
'image/x-canon-cr3',
|
||||
'image/x-canon-cr2',
|
||||
'image/x-nikon-nef',
|
||||
'image/x-sony-arw',
|
||||
'image/x-olympus-orf',
|
||||
'image/x-fuji-raf',
|
||||
'image/x-panasonic-rw2',
|
||||
'image/x-panasonic-raw',
|
||||
'image/x-pentax-pef',
|
||||
'image/x-samsung-srw',
|
||||
'cr3',
|
||||
'cr2',
|
||||
'nef',
|
||||
'arw',
|
||||
'orf',
|
||||
'raf',
|
||||
'rw2',
|
||||
'raw',
|
||||
'pef',
|
||||
'srw',
|
||||
],
|
||||
['image/heic', 'image/heif', 'heic', 'heif'],
|
||||
['image/avif', 'avif'],
|
||||
['image/jpeg', 'image/jpg', 'jpeg', 'jpg'],
|
||||
];
|
||||
|
||||
const DEFAULT_FORMAT_PRIORITY = formatPreferenceGroups.length;
|
||||
|
||||
const formatPriorityLookup = formatPreferenceGroups.reduce<Map<string, number>>((lookup, group, index) => {
|
||||
for (const format of group) {
|
||||
lookup.set(format, index);
|
||||
}
|
||||
return lookup;
|
||||
}, new Map());
|
||||
|
||||
const getExtension = (path?: string) => {
|
||||
if (!path) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const index = path.lastIndexOf('.');
|
||||
if (index === -1 || index === path.length - 1) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return path.slice(index + 1).toLowerCase();
|
||||
};
|
||||
|
||||
const getAssetFormatPriority = (asset: AssetResponseDto) => {
|
||||
const candidates = [
|
||||
asset.originalMimeType?.toLowerCase(),
|
||||
getExtension(asset.originalFileName),
|
||||
getExtension(asset.originalPath),
|
||||
];
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (!candidate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const priority = formatPriorityLookup.get(candidate);
|
||||
if (priority !== undefined) {
|
||||
return priority;
|
||||
}
|
||||
}
|
||||
|
||||
return DEFAULT_FORMAT_PRIORITY;
|
||||
};
|
||||
|
||||
/**
|
||||
* Suggests the best duplicate asset to keep from a list of duplicates.
|
||||
*
|
||||
* The best asset is determined by the following criteria:
|
||||
* - Preferred original file format (based on mime type or extension)
|
||||
* - Largest image file size in bytes
|
||||
* - Largest count of exif data
|
||||
*
|
||||
|
|
@ -13,18 +86,15 @@ import { sortBy } from 'lodash-es';
|
|||
* @returns The best asset to keep
|
||||
*/
|
||||
export const suggestDuplicate = (assets: AssetResponseDto[]): AssetResponseDto | undefined => {
|
||||
let duplicateAssets = sortBy(assets, (asset) => asset.exifInfo?.fileSizeInByte ?? 0);
|
||||
|
||||
// Update the list to only include assets with the largest file size
|
||||
duplicateAssets = duplicateAssets.filter(
|
||||
(asset) => asset.exifInfo?.fileSizeInByte === duplicateAssets.at(-1)?.exifInfo?.fileSizeInByte,
|
||||
);
|
||||
|
||||
// If there are multiple assets with the same file size, sort the list by the count of exif data
|
||||
if (duplicateAssets.length >= 2) {
|
||||
duplicateAssets = sortBy(duplicateAssets, getExifCount);
|
||||
if (assets.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Return the last asset in the list
|
||||
return duplicateAssets.pop();
|
||||
const sorted = sortBy(assets, [
|
||||
(asset) => getAssetFormatPriority(asset),
|
||||
(asset) => -(asset.exifInfo?.fileSizeInByte ?? 0),
|
||||
(asset) => -getExifCount(asset),
|
||||
]);
|
||||
|
||||
return sorted[0];
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue